gwaslab 3.4.35__tar.gz → 3.4.37__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- {gwaslab-3.4.35/src/gwaslab.egg-info → gwaslab-3.4.37}/PKG-INFO +5 -6
- {gwaslab-3.4.35 → gwaslab-3.4.37}/README.md +1 -1
- {gwaslab-3.4.35 → gwaslab-3.4.37}/pyproject.toml +4 -5
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/__init__.py +1 -1
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/bd_common_data.py +4 -2
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/g_Sumstats.py +56 -33
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/g_meta.py +13 -3
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/g_version.py +2 -2
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/hm_harmonize_sumstats.py +43 -18
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/io_preformat_input.py +9 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/qc_check_datatype.py +14 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/qc_fix_sumstats.py +278 -119
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_ex_process_h5.py +26 -17
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_in_fill_data.py +50 -12
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_aux_quickfix.py +53 -52
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_plot_compare_effect.py +27 -8
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_plot_forestplot.py +1 -1
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_plot_mqqplot.py +127 -48
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_plot_regionalplot.py +20 -9
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_plot_rg_heatmap.py +8 -4
- {gwaslab-3.4.35 → gwaslab-3.4.37/src/gwaslab.egg-info}/PKG-INFO +5 -6
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab.egg-info/requires.txt +2 -3
- {gwaslab-3.4.35 → gwaslab-3.4.37}/LICENSE +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/setup.cfg +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/bd_config.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/bd_download.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/bd_get_hapmap3.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/data/chrx_par/chrx_par_hg19.bed.gz +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/data/chrx_par/chrx_par_hg38.bed.gz +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/data/formatbook.json +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/data/reference.json +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/g_Log.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/g_Phenotypes.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/g_SumstatsPair.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/g_SumstatsT.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/g_Sumstats_summary.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/g_vchange_status.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/hm_casting.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/hm_rsid_to_chrpos.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/io_read_ldsc.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/io_read_tabular.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/io_to_formats.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/io_to_pickle.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/run_script.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_ex_calculate_ldmatrix.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_ex_calculate_prs.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_ex_gwascatalog.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_ex_ldproxyfinder.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_ex_plink_filter.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_ex_process_ref.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_ex_run_2samplemr.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_ex_run_clumping.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_ex_run_coloc.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_ex_run_susie.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_in_calculate_gc.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_in_calculate_power.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_in_convert_h2.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_in_correct_winnerscurse.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_in_filter_value.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_in_get_density.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/util_in_get_sig.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_aux_annotate_plot.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_aux_reposition_text.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_aux_save_figure.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_plot_compare_af.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_plot_miamiplot.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_plot_miamiplot2.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_plot_qqplot.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_plot_stackedregional.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab/viz_plot_trumpetplot.py +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab.egg-info/SOURCES.txt +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab.egg-info/dependency_links.txt +0 -0
- {gwaslab-3.4.35 → gwaslab-3.4.37}/src/gwaslab.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: gwaslab
|
|
3
|
-
Version: 3.4.
|
|
3
|
+
Version: 3.4.37
|
|
4
4
|
Summary: A collection of handy tools for GWAS SumStats
|
|
5
5
|
Author-email: Yunye <yunye@gwaslab.com>
|
|
6
6
|
Project-URL: Homepage, https://cloufield.github.io/gwaslab/
|
|
@@ -8,19 +8,18 @@ Project-URL: Github, https://github.com/Cloufield/gwaslab
|
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: <=3.10,>=3.
|
|
11
|
+
Requires-Python: <=3.10,>=3.9
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
-
Requires-Dist: pandas!=1.5
|
|
14
|
+
Requires-Dist: pandas!=1.5,>=1.3
|
|
15
15
|
Requires-Dist: numpy>=1.21.2
|
|
16
16
|
Requires-Dist: matplotlib!=3.7.2,>=3.5
|
|
17
17
|
Requires-Dist: seaborn>=0.11.1
|
|
18
|
-
Requires-Dist: scipy>=1.
|
|
18
|
+
Requires-Dist: scipy>=1.12
|
|
19
19
|
Requires-Dist: pySAM<0.20,>=0.18.1
|
|
20
20
|
Requires-Dist: Biopython>=1.79
|
|
21
21
|
Requires-Dist: adjustText<=0.8,>=0.7.3
|
|
22
22
|
Requires-Dist: liftover>=1.1.13
|
|
23
|
-
Requires-Dist: statsmodels==0.13
|
|
24
23
|
Requires-Dist: scikit-allel>=1.3.5
|
|
25
24
|
Requires-Dist: pyensembl==2.2.3
|
|
26
25
|
Requires-Dist: gtfparse==1.3.0
|
|
@@ -45,7 +44,7 @@ Note: GWASLab is being updated very frequently for now. I will release the first
|
|
|
45
44
|
## Install
|
|
46
45
|
|
|
47
46
|
```
|
|
48
|
-
pip install gwaslab==3.4.
|
|
47
|
+
pip install gwaslab==3.4.35
|
|
49
48
|
```
|
|
50
49
|
|
|
51
50
|
|
|
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
|
|
|
7
7
|
|
|
8
8
|
[project]
|
|
9
9
|
name = "gwaslab"
|
|
10
|
-
version = "3.4.
|
|
10
|
+
version = "3.4.37"
|
|
11
11
|
authors = [
|
|
12
12
|
{ name="Yunye", email="yunye@gwaslab.com" },
|
|
13
13
|
]
|
|
@@ -16,22 +16,21 @@ description = "A collection of handy tools for GWAS SumStats"
|
|
|
16
16
|
readme = "README.md"
|
|
17
17
|
|
|
18
18
|
dependencies = [
|
|
19
|
-
"pandas>=1.3,!=1.5
|
|
19
|
+
"pandas>=1.3,!=1.5",
|
|
20
20
|
"numpy>=1.21.2",
|
|
21
21
|
"matplotlib>=3.5,!=3.7.2",
|
|
22
22
|
"seaborn>=0.11.1",
|
|
23
|
-
"scipy>=1.
|
|
23
|
+
"scipy>=1.12",
|
|
24
24
|
"pySAM>=0.18.1,<0.20",
|
|
25
25
|
"Biopython>=1.79",
|
|
26
26
|
"adjustText>=0.7.3, <=0.8",
|
|
27
27
|
"liftover>=1.1.13",
|
|
28
|
-
"statsmodels==0.13",
|
|
29
28
|
"scikit-allel>=1.3.5",
|
|
30
29
|
"pyensembl==2.2.3",
|
|
31
30
|
"gtfparse==1.3.0"
|
|
32
31
|
]
|
|
33
32
|
|
|
34
|
-
requires-python = ">=3.
|
|
33
|
+
requires-python = ">=3.9,<=3.10"
|
|
35
34
|
classifiers = [
|
|
36
35
|
"Programming Language :: Python :: 3",
|
|
37
36
|
"License :: OSI Approved :: MIT License",
|
|
@@ -42,6 +42,6 @@ from gwaslab.util_in_calculate_power import get_power
|
|
|
42
42
|
from gwaslab.util_in_calculate_power import get_beta
|
|
43
43
|
from gwaslab.viz_plot_trumpetplot import plot_power
|
|
44
44
|
from gwaslab.viz_plot_trumpetplot import plot_power_x
|
|
45
|
-
from gwaslab.util_ex_process_h5 import
|
|
45
|
+
from gwaslab.util_ex_process_h5 import process_vcf_to_hfd5
|
|
46
46
|
from gwaslab.util_ex_run_susie import _run_susie_rss as run_susie_rss
|
|
47
47
|
from gwaslab.io_read_tabular import _read_tabular as read_tabular
|
|
@@ -146,12 +146,14 @@ def get_number_to_NC(build,inverse=False):
|
|
|
146
146
|
def get_NC_to_number(build):
|
|
147
147
|
return get_number_to_NC(build=build,inverse=True)
|
|
148
148
|
|
|
149
|
-
def get_chr_list(add_number=False,n=25):
|
|
149
|
+
def get_chr_list(add_number=False,n=25,only_number=False):
|
|
150
150
|
chrom_list=[str(i) for i in range(1,n+1)]+["X","Y","M","MT"]
|
|
151
151
|
|
|
152
|
-
if add_number
|
|
152
|
+
if add_number == True:
|
|
153
153
|
chrom_list = [str(i) for i in range(1,n+1)] + ["X","Y","M","MT"] + [i for i in range(1,n+1)]
|
|
154
154
|
|
|
155
|
+
if only_number ==True:
|
|
156
|
+
chrom_list = [i for i in range(1,n+1)]
|
|
155
157
|
return chrom_list
|
|
156
158
|
|
|
157
159
|
def get_chr_to_number(out_chr=False,xymt=["X","Y","MT"],xymt_num=[23,24,25]):
|
|
@@ -52,7 +52,8 @@ from gwaslab.bd_common_data import get_format_dict
|
|
|
52
52
|
from gwaslab.bd_common_data import get_formats_list
|
|
53
53
|
from gwaslab.g_version import _show_version
|
|
54
54
|
from gwaslab.g_version import gwaslab_info
|
|
55
|
-
from gwaslab.g_meta import
|
|
55
|
+
from gwaslab.g_meta import _init_meta
|
|
56
|
+
from gwaslab.g_meta import _append_meta_record
|
|
56
57
|
from gwaslab.util_ex_run_clumping import _clump
|
|
57
58
|
from gwaslab.util_ex_calculate_ldmatrix import tofinemapping
|
|
58
59
|
from gwaslab.util_ex_calculate_prs import _calculate_prs
|
|
@@ -60,6 +61,7 @@ from gwaslab.viz_plot_mqqplot import mqqplot
|
|
|
60
61
|
from gwaslab.viz_plot_trumpetplot import plottrumpet
|
|
61
62
|
from gwaslab.viz_plot_compare_af import plotdaf
|
|
62
63
|
from gwaslab.util_ex_run_susie import _run_susie_rss
|
|
64
|
+
from gwaslab.qc_fix_sumstats import _check_data_consistency
|
|
63
65
|
import gc
|
|
64
66
|
|
|
65
67
|
#20220309
|
|
@@ -119,10 +121,9 @@ class Sumstats():
|
|
|
119
121
|
self.log = Log()
|
|
120
122
|
|
|
121
123
|
# meta information
|
|
122
|
-
self.meta =
|
|
124
|
+
self.meta = _init_meta()
|
|
123
125
|
self.build = build
|
|
124
|
-
self.meta["gwaslab"]["study_name"] =
|
|
125
|
-
#self.meta["gwaslab"]["genome_build"] = build
|
|
126
|
+
self.meta["gwaslab"]["study_name"] = study
|
|
126
127
|
self.meta["gwaslab"]["species"] = species
|
|
127
128
|
|
|
128
129
|
# initialize attributes for clumping and finmapping
|
|
@@ -217,8 +218,22 @@ class Sumstats():
|
|
|
217
218
|
return lookupstatus(self.data[status])
|
|
218
219
|
|
|
219
220
|
def set_build(self, build, verbose=True):
|
|
220
|
-
self.data = _set_build(self.data, build=build, log=self.log,verbose=verbose)
|
|
221
|
+
self.data, self.meta["gwaslab"]["genome_build"] = _set_build(self.data, build=build, log=self.log,verbose=verbose)
|
|
221
222
|
gc.collect()
|
|
223
|
+
|
|
224
|
+
def infer_build(self,**args):
|
|
225
|
+
self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,**args)
|
|
226
|
+
|
|
227
|
+
def liftover(self,to_build, from_build=None,**args):
|
|
228
|
+
if from_build is None:
|
|
229
|
+
if self.meta["gwaslab"]["genome_build"]=="99":
|
|
230
|
+
self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,**args)
|
|
231
|
+
from_build = self.meta["gwaslab"]["genome_build"]
|
|
232
|
+
self.data = parallelizeliftovervariant(self.data,from_build=from_build, to_build=to_build, log=self.log,**args)
|
|
233
|
+
self.meta["is_sorted"] = False
|
|
234
|
+
self.meta["is_harmonised"] = False
|
|
235
|
+
self.meta["gwaslab"]["genome_build"]=to_build
|
|
236
|
+
|
|
222
237
|
# QC ######################################################################################
|
|
223
238
|
#clean the sumstats with one line
|
|
224
239
|
def basic_check(self,
|
|
@@ -231,20 +246,23 @@ class Sumstats():
|
|
|
231
246
|
fixpos_args={},
|
|
232
247
|
fixallele_args={},
|
|
233
248
|
sanitycheckstats_args={},
|
|
249
|
+
consistencycheck_args={},
|
|
234
250
|
normalize=True,
|
|
235
251
|
normalizeallele_args={},
|
|
236
252
|
verbose=True):
|
|
237
253
|
###############################################
|
|
238
254
|
# try to fix data without dropping any information
|
|
239
255
|
self.data = fixID(self.data,verbose=verbose, **fixid_args)
|
|
240
|
-
if remove_dup is True:
|
|
241
|
-
self.data = removedup(self.data,log=self.log,verbose=verbose,**removedup_args)
|
|
242
256
|
self.data = fixchr(self.data,log=self.log,remove=remove,verbose=verbose,**fixchr_agrs)
|
|
243
257
|
self.data = fixpos(self.data,log=self.log,remove=remove,verbose=verbose,**fixpos_args)
|
|
244
258
|
self.data = fixallele(self.data,log=self.log,remove=remove,verbose=verbose,**fixallele_args)
|
|
245
259
|
self.data = sanitycheckstats(self.data,log=self.log,verbose=verbose,**sanitycheckstats_args)
|
|
260
|
+
_check_data_consistency(self.data,log=self.log,verbose=verbose,**consistencycheck_args)
|
|
261
|
+
|
|
246
262
|
if normalize is True:
|
|
247
263
|
self.data = parallelnormalizeallele(self.data,n_cores=n_cores,verbose=verbose,log=self.log,**normalizeallele_args)
|
|
264
|
+
if remove_dup is True:
|
|
265
|
+
self.data = removedup(self.data,log=self.log,verbose=verbose,**removedup_args)
|
|
248
266
|
self.data = sortcoordinate(self.data,verbose=verbose,log=self.log)
|
|
249
267
|
self.data = sortcolumn(self.data,verbose=verbose,log=self.log)
|
|
250
268
|
self.meta["is_sorted"] = True
|
|
@@ -329,9 +347,9 @@ class Sumstats():
|
|
|
329
347
|
|
|
330
348
|
self.data= parallelinferstrand(self.data,ref_infer = ref_infer,ref_alt_freq=ref_alt_freq,maf_threshold=maf_threshold,
|
|
331
349
|
n_cores=n_cores,log=self.log,**inferstrand_args)
|
|
332
|
-
|
|
333
|
-
self.meta["gwaslab"]["references"]["ref_infer"] = ref_infer
|
|
334
350
|
|
|
351
|
+
self.meta["gwaslab"]["references"]["ref_infer"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer"] , ref_infer)
|
|
352
|
+
|
|
335
353
|
self.data =flipallelestats(self.data,log=self.log,**flipallelestats_args)
|
|
336
354
|
|
|
337
355
|
gc.collect()
|
|
@@ -341,13 +359,18 @@ class Sumstats():
|
|
|
341
359
|
|
|
342
360
|
self.data = parallelizeassignrsid(self.data,path=ref_rsid_tsv,ref_mode="tsv",
|
|
343
361
|
n_cores=n_cores,log=self.log,**assignrsid_args)
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
|
|
344
365
|
self.meta["gwaslab"]["references"]["ref_rsid_tsv"] = ref_rsid_tsv
|
|
345
366
|
gc.collect()
|
|
367
|
+
|
|
346
368
|
if ref_rsid_vcf is not None:
|
|
347
|
-
|
|
348
369
|
self.data = parallelizeassignrsid(self.data,path=ref_rsid_vcf,ref_mode="vcf",
|
|
349
370
|
n_cores=n_cores,log=self.log,**assignrsid_args)
|
|
350
|
-
|
|
371
|
+
|
|
372
|
+
self.meta["gwaslab"]["references"]["ref_rsid_vcf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_rsid_vcf"] , ref_rsid_vcf)
|
|
373
|
+
|
|
351
374
|
gc.collect()
|
|
352
375
|
######################################################
|
|
353
376
|
if remove is True:
|
|
@@ -376,17 +399,23 @@ class Sumstats():
|
|
|
376
399
|
self.data = removedup(self.data,log=self.log,**args)
|
|
377
400
|
def check_sanity(self,**args):
|
|
378
401
|
self.data = sanitycheckstats(self.data,log=self.log,**args)
|
|
379
|
-
|
|
402
|
+
def check_data_consistency(self, **args):
|
|
403
|
+
_check_data_consistency(self.data,log=self.log,**args)
|
|
380
404
|
def check_id(self,**args):
|
|
381
405
|
pass
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
self.data =
|
|
406
|
+
|
|
407
|
+
def check_ref(self,ref_seq,**args):
|
|
408
|
+
self.meta["gwaslab"]["references"]["ref_seq"] = ref_seq
|
|
409
|
+
self.data = checkref(self.data,ref_seq,log=self.log,**args)
|
|
410
|
+
def infer_strand(self,ref_infer,**args):
|
|
411
|
+
self.meta["gwaslab"]["references"]["ref_infer"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer"] , ref_infer)
|
|
412
|
+
self.data = parallelinferstrand(self.data,ref_infer=ref_infer,log=self.log,**args)
|
|
413
|
+
|
|
386
414
|
def flip_allele_stats(self,**args):
|
|
387
415
|
self.data = flipallelestats(self.data,log=self.log,**args)
|
|
388
416
|
def normalize_allele(self,**args):
|
|
389
417
|
self.data = parallelnormalizeallele(self.data,log=self.log,**args)
|
|
418
|
+
|
|
390
419
|
def assign_rsid(self,
|
|
391
420
|
ref_rsid_tsv=None,
|
|
392
421
|
ref_rsid_vcf=None,
|
|
@@ -396,21 +425,15 @@ class Sumstats():
|
|
|
396
425
|
self.meta["gwaslab"]["references"]["ref_rsid_tsv"] = ref_rsid_tsv
|
|
397
426
|
if ref_rsid_vcf is not None:
|
|
398
427
|
self.data = parallelizeassignrsid(self.data,path=ref_rsid_vcf,ref_mode="vcf",log=self.log,**args)
|
|
399
|
-
self.meta["gwaslab"]["references"]["ref_rsid_vcf"] = ref_rsid_vcf
|
|
428
|
+
self.meta["gwaslab"]["references"]["ref_rsid_vcf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_rsid_vcf"] , ref_rsid_vcf)
|
|
429
|
+
|
|
400
430
|
def rsid_to_chrpos(self,**args):
|
|
401
431
|
self.data = rsidtochrpos(self.data,log=self.log,**args)
|
|
432
|
+
|
|
402
433
|
def rsid_to_chrpos2(self,**args):
|
|
403
434
|
self.data = parallelrsidtochrpos(self.data,log=self.log,**args)
|
|
404
435
|
|
|
405
|
-
|
|
406
|
-
if from_build is None:
|
|
407
|
-
if self.meta["gwaslab"]["genome_build"]=="99":
|
|
408
|
-
self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,**args)
|
|
409
|
-
from_build = self.meta["gwaslab"]["genome_build"]
|
|
410
|
-
self.data = parallelizeliftovervariant(self.data,from_build=from_build, to_build=to_build, log=self.log,**args)
|
|
411
|
-
self.meta["is_sorted"] = False
|
|
412
|
-
self.meta["is_harmonised"] = False
|
|
413
|
-
self.meta["gwaslab"]["genome_build"]=to_build
|
|
436
|
+
|
|
414
437
|
############################################################################################################
|
|
415
438
|
|
|
416
439
|
def sort_coordinate(self,**sort_args):
|
|
@@ -420,11 +443,10 @@ class Sumstats():
|
|
|
420
443
|
self.data = sortcolumn(self.data,log=self.log,**args)
|
|
421
444
|
|
|
422
445
|
############################################################################################################
|
|
423
|
-
def fill_data(self, **args):
|
|
424
|
-
self.data = filldata(self.data
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,**args)
|
|
446
|
+
def fill_data(self, verbose=True, **args):
|
|
447
|
+
self.data = filldata(self.data, verbose=verbose, **args)
|
|
448
|
+
self.data = sortcolumn(self.data, verbose=verbose, log=self.log)
|
|
449
|
+
|
|
428
450
|
# utilities ############################################################################################################
|
|
429
451
|
# filter series ######################################################################
|
|
430
452
|
def get_flanking(self, inplace=False,**args):
|
|
@@ -485,11 +507,12 @@ class Sumstats():
|
|
|
485
507
|
|
|
486
508
|
def check_af(self,ref_infer,**args):
|
|
487
509
|
self.data = parallelecheckaf(self.data,ref_infer=ref_infer,log=self.log,**args)
|
|
488
|
-
self.meta["gwaslab"]["references"]["ref_infer_daf"] = ref_infer
|
|
489
|
-
|
|
510
|
+
self.meta["gwaslab"]["references"]["ref_infer_daf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer_daf"] , ref_infer)
|
|
511
|
+
|
|
490
512
|
def infer_af(self,ref_infer,**args):
|
|
491
513
|
self.data = paralleleinferaf(self.data,ref_infer=ref_infer,log=self.log,**args)
|
|
492
514
|
self.meta["gwaslab"]["references"]["ref_infer_af"] = ref_infer
|
|
515
|
+
self.meta["gwaslab"]["references"]["ref_infer_af"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer_af"] , ref_infer)
|
|
493
516
|
|
|
494
517
|
def plot_daf(self, **args):
|
|
495
518
|
fig,outliers = plotdaf(self.data, **args)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from gwaslab.g_version import gwaslab_info
|
|
2
2
|
|
|
3
|
-
def
|
|
3
|
+
def _init_meta():
|
|
4
4
|
metadata = {"gwaslab":{
|
|
5
5
|
"gwaslab_version": gwaslab_info()["version"],
|
|
6
6
|
"study_name":"Sumstats_1",
|
|
@@ -23,7 +23,11 @@ def init_meta():
|
|
|
23
23
|
"ref_rsid_tsv":"Unknown",
|
|
24
24
|
"ref_rsid_vcf":"Unknown",
|
|
25
25
|
"ref_seq":"Unknown",
|
|
26
|
-
"ref_infer":"Unknown"
|
|
26
|
+
"ref_infer":"Unknown",
|
|
27
|
+
"ref_infer_af":"Unknown",
|
|
28
|
+
"ref_infer_daf":"Unknown",
|
|
29
|
+
"ref_rsid_to_chrpos_tsv":"Unknown",
|
|
30
|
+
"ref_rsid_to_chrpos_vcf":"Unknown"
|
|
27
31
|
}
|
|
28
32
|
},
|
|
29
33
|
"genotyping_technology":"Unknown",
|
|
@@ -45,4 +49,10 @@ def init_meta():
|
|
|
45
49
|
"coordinate_system":"1-based",
|
|
46
50
|
"sex": "M|F|combined"
|
|
47
51
|
}
|
|
48
|
-
return metadata.copy()
|
|
52
|
+
return metadata.copy()
|
|
53
|
+
|
|
54
|
+
def _append_meta_record(old, new):
|
|
55
|
+
if old == "Unknown" or old== "Unchecked":
|
|
56
|
+
return new
|
|
57
|
+
else:
|
|
58
|
+
return "{}, {}".format(old, new)
|
|
@@ -11,6 +11,8 @@ import gc
|
|
|
11
11
|
from gwaslab.g_Log import Log
|
|
12
12
|
from gwaslab.qc_fix_sumstats import fixchr
|
|
13
13
|
from gwaslab.qc_fix_sumstats import fixpos
|
|
14
|
+
from gwaslab.qc_fix_sumstats import sortcolumn
|
|
15
|
+
from gwaslab.qc_check_datatype import check_dataframe_shape
|
|
14
16
|
from gwaslab.bd_common_data import get_number_to_chr
|
|
15
17
|
from gwaslab.bd_common_data import get_chr_list
|
|
16
18
|
from gwaslab.bd_common_data import get_chr_to_number
|
|
@@ -27,7 +29,7 @@ from gwaslab.g_version import _get_version
|
|
|
27
29
|
|
|
28
30
|
###~!!!!
|
|
29
31
|
def rsidtochrpos(sumstats,
|
|
30
|
-
path=
|
|
32
|
+
path=None, ref_rsid_to_chrpos_tsv=None, snpid="SNPID",
|
|
31
33
|
rsid="rsID", chrom="CHR",pos="POS",ref_rsid="rsID",ref_chr="CHR",ref_pos="POS", build="19",
|
|
32
34
|
overwrite=False,remove=False,chunksize=5000000,verbose=True,log=Log()):
|
|
33
35
|
'''
|
|
@@ -35,9 +37,12 @@ def rsidtochrpos(sumstats,
|
|
|
35
37
|
'''
|
|
36
38
|
#########################################################################################################
|
|
37
39
|
if verbose: log.write("Start to update chromosome and position information based on rsID...{}".format(_get_version()))
|
|
38
|
-
|
|
40
|
+
check_dataframe_shape(sumstats, log, verbose)
|
|
39
41
|
if verbose: log.write(" -rsID dictionary file: "+ path)
|
|
40
42
|
|
|
43
|
+
if ref_rsid_to_chrpos_tsv is not None:
|
|
44
|
+
path = ref_rsid_to_chrpos_tsv
|
|
45
|
+
|
|
41
46
|
if snpid in sumstats.columns and sum(sumstats[rsid].isna())>0:
|
|
42
47
|
if verbose: log.write(" -Filling na in rsID columns with SNPID...")
|
|
43
48
|
sumstats.loc[sumstats[rsid].isna(),rsid] = sumstats.loc[sumstats[rsid].isna(),snpid]
|
|
@@ -75,6 +80,7 @@ def rsidtochrpos(sumstats,
|
|
|
75
80
|
if verbose: log.write(" -Updating CHR and POS finished.Start to re-fixing CHR and POS... ")
|
|
76
81
|
sumstats = fixchr(sumstats,verbose=verbose)
|
|
77
82
|
sumstats = fixpos(sumstats,verbose=verbose)
|
|
83
|
+
sumstats = sortcolumn(sumstats,verbose=verbose)
|
|
78
84
|
return sumstats
|
|
79
85
|
####################################################################################################
|
|
80
86
|
|
|
@@ -96,9 +102,19 @@ def merge_chrpos(sumstats_part,all_groups_max,path,build,status):
|
|
|
96
102
|
return sumstats_part
|
|
97
103
|
|
|
98
104
|
|
|
99
|
-
def parallelrsidtochrpos(sumstats, rsid="rsID", chrom="CHR",pos="POS", path=None,build="99",status="STATUS",
|
|
105
|
+
def parallelrsidtochrpos(sumstats, rsid="rsID", chrom="CHR",pos="POS", path=None, ref_rsid_to_chrpos_vcf = None, ref_rsid_to_chrpos_hdf5 = None, build="99",status="STATUS",
|
|
100
106
|
n_cores=4,block_size=20000000,verbose=True,log=Log()):
|
|
107
|
+
|
|
108
|
+
if ref_rsid_to_chrpos_hdf5 is not None:
|
|
109
|
+
path = ref_rsid_to_chrpos_hdf5
|
|
110
|
+
elif ref_rsid_to_chrpos_vcf is not None:
|
|
111
|
+
vcf_file_name = os.path.basename(ref_rsid_to_chrpos_vcf)
|
|
112
|
+
vcf_dir_path = os.path.dirname(ref_rsid_to_chrpos_vcf)
|
|
113
|
+
path = "{}/{}.rsID_CHR_POS_groups_{}.h5".format(vcf_dir_path,vcf_file_name,int(block_size))
|
|
114
|
+
|
|
101
115
|
if verbose: log.write("Start to assign CHR and POS using rsIDs...{}".format(_get_version()))
|
|
116
|
+
check_dataframe_shape(sumstats, log, verbose)
|
|
117
|
+
|
|
102
118
|
if path is None:
|
|
103
119
|
raise ValueError("Please provide path to hdf5 file.")
|
|
104
120
|
|
|
@@ -164,13 +180,16 @@ def parallelrsidtochrpos(sumstats, rsid="rsID", chrom="CHR",pos="POS", path=None
|
|
|
164
180
|
# merge back
|
|
165
181
|
if verbose: log.write(" -Append data... ")
|
|
166
182
|
sumstats = pd.concat([sumstats_rs,sumstats_nonrs],ignore_index=True)
|
|
183
|
+
|
|
167
184
|
del sumstats_rs
|
|
168
185
|
del sumstats_nonrs
|
|
169
186
|
gc.collect()
|
|
170
187
|
|
|
171
188
|
# check
|
|
172
|
-
sumstats = fixchr(sumstats,verbose=
|
|
173
|
-
sumstats = fixpos(sumstats,verbose=
|
|
189
|
+
sumstats = fixchr(sumstats,verbose=verbose)
|
|
190
|
+
sumstats = fixpos(sumstats,verbose=verbose)
|
|
191
|
+
sumstats = sortcolumn(sumstats,verbose=verbose)
|
|
192
|
+
|
|
174
193
|
pool.close()
|
|
175
194
|
pool.join()
|
|
176
195
|
gc.collect()
|
|
@@ -235,7 +254,7 @@ def check_status(row,record):
|
|
|
235
254
|
|
|
236
255
|
def checkref(sumstats,ref_path,chrom="CHR",pos="POS",ea="EA",nea="NEA",status="STATUS",chr_dict=get_chr_to_number(),remove=False,verbose=True,log=Log()):
|
|
237
256
|
if verbose: log.write("Start to check if NEA is aligned with reference sequence...{}".format(_get_version()))
|
|
238
|
-
|
|
257
|
+
check_dataframe_shape(sumstats, log, verbose)
|
|
239
258
|
if verbose: log.write(" -Reference genome fasta file: "+ ref_path)
|
|
240
259
|
if verbose: log.write(" -Checking records: ", end="")
|
|
241
260
|
chromlist = get_chr_list(add_number=True)
|
|
@@ -381,7 +400,7 @@ def parallelizeassignrsid(sumstats, path, ref_mode="vcf",snpid="SNPID",rsid="rsI
|
|
|
381
400
|
assign rsID based on chr:pos
|
|
382
401
|
'''
|
|
383
402
|
if verbose: log.write("Start to annotate rsID based on chromosome and position information...{}".format(_get_version()))
|
|
384
|
-
|
|
403
|
+
check_dataframe_shape(sumstats, log, verbose)
|
|
385
404
|
if verbose: log.write(" -SNPID-rsID text file: "+ path)
|
|
386
405
|
|
|
387
406
|
standardized_normalized = sumstats["STATUS"].str.match("\w\w\w[0][01234][0126]\w", case=False, flags=0, na=False)
|
|
@@ -517,7 +536,7 @@ def parallelinferstrand(sumstats,ref_infer,ref_alt_freq=None,maf_threshold=0.40,
|
|
|
517
536
|
chr="CHR",pos="POS",ref="NEA",alt="EA",eaf="EAF",status="STATUS",
|
|
518
537
|
chr_dict=None,verbose=True,log=Log()):
|
|
519
538
|
if verbose: log.write("Start to infer strand for palindromic SNPs...{}".format(_get_version()))
|
|
520
|
-
|
|
539
|
+
check_dataframe_shape(sumstats, log, verbose)
|
|
521
540
|
if verbose: log.write(" -Reference vcf file:", ref_infer)
|
|
522
541
|
|
|
523
542
|
chr_dict = auto_check_vcf_chr_dict(ref_infer, chr_dict, verbose, log)
|
|
@@ -528,8 +547,8 @@ def parallelinferstrand(sumstats,ref_infer,ref_alt_freq=None,maf_threshold=0.40,
|
|
|
528
547
|
if "p" in mode:
|
|
529
548
|
# ref_alt_freq INFO in vcf was provided
|
|
530
549
|
if ref_alt_freq is not None:
|
|
550
|
+
|
|
531
551
|
if verbose: log.write(" -Alternative allele frequency in INFO:", ref_alt_freq)
|
|
532
|
-
|
|
533
552
|
## checking \w\w\w\w[0]\w\w -> standardized and normalized snp
|
|
534
553
|
good_chrpos = sumstats[status].str.match(r'\w\w\w[0][0]\w\w', case=False, flags=0, na=False)
|
|
535
554
|
palindromic = good_chrpos & is_palindromic(sumstats[[ref,alt]],a1=ref,a2=alt)
|
|
@@ -538,22 +557,28 @@ def parallelinferstrand(sumstats,ref_infer,ref_alt_freq=None,maf_threshold=0.40,
|
|
|
538
557
|
##not palindromic : change status
|
|
539
558
|
sumstats.loc[not_palindromic_snp,status] = vchange_status(sumstats.loc[not_palindromic_snp,status], 7 ,"9","0")
|
|
540
559
|
if verbose: log.write(" -Identified ", sum(palindromic)," palindromic SNPs...")
|
|
541
|
-
|
|
560
|
+
|
|
542
561
|
#palindromic but can not infer
|
|
543
|
-
maf_can_infer = (sumstats.loc[:,eaf] < maf_threshold) | (sumstats.loc[:,eaf] > 1 - maf_threshold)
|
|
562
|
+
maf_can_infer = (sumstats.loc[:,eaf] < maf_threshold) | (sumstats.loc[:,eaf] > 1 - maf_threshold)
|
|
563
|
+
|
|
544
564
|
sumstats.loc[palindromic&(~maf_can_infer),status] = vchange_status(sumstats.loc[palindromic&(~maf_can_infer),status],7,"9","7")
|
|
565
|
+
|
|
566
|
+
#palindromic WITH UNKNWON OR UNCHECKED STATUS
|
|
567
|
+
unknow_palindromic = sumstats[status].str.match(r'\w\w\w\w\w[012][89]', case=False, flags=0, na=False)
|
|
545
568
|
|
|
569
|
+
unknow_palindromic_to_check = palindromic & maf_can_infer & unknow_palindromic
|
|
570
|
+
|
|
571
|
+
if verbose: log.write(" -After filtering by MAF< {} , {} palindromic SNPs with unknown strand will be inferred...".format(maf_threshold, sum(unknow_palindromic_to_check)))
|
|
546
572
|
|
|
547
|
-
if verbose: log.write(" -After filtering by MAF< ", maf_threshold ," , the strand of ", sum(palindromic & maf_can_infer)," palindromic SNPs will be inferred...")
|
|
548
573
|
#########################################################################################
|
|
549
|
-
if sum(
|
|
550
|
-
if sum(
|
|
574
|
+
if sum(unknow_palindromic_to_check)>0:
|
|
575
|
+
if sum(unknow_palindromic_to_check)<10000:
|
|
551
576
|
n_cores=1
|
|
552
|
-
df_split = np.array_split(sumstats.loc[
|
|
577
|
+
df_split = np.array_split(sumstats.loc[unknow_palindromic_to_check,[chr,pos,ref,alt,eaf,status]], n_cores)
|
|
553
578
|
pool = Pool(n_cores)
|
|
554
579
|
map_func = partial(check_strand,chr=chr,pos=pos,ref=ref,alt=alt,eaf=eaf,status=status,ref_infer=ref_infer,ref_alt_freq=ref_alt_freq,chr_dict=chr_dict)
|
|
555
580
|
status_inferred = pd.concat(pool.map(map_func,df_split))
|
|
556
|
-
sumstats.loc[
|
|
581
|
+
sumstats.loc[unknow_palindromic_to_check,status] = status_inferred.values
|
|
557
582
|
pool.close()
|
|
558
583
|
pool.join()
|
|
559
584
|
#########################################################################################
|
|
@@ -650,7 +675,7 @@ def parallelinferstrand(sumstats,ref_infer,ref_alt_freq=None,maf_threshold=0.40,
|
|
|
650
675
|
def parallelecheckaf(sumstats,ref_infer,ref_alt_freq=None,maf_threshold=0.4,column_name="DAF",suffix="",n_cores=1, chr="CHR",pos="POS",ref="NEA",alt="EA",eaf="EAF",status="STATUS",chr_dict=None,force=False, verbose=True,log=Log()):
|
|
651
676
|
|
|
652
677
|
if verbose: log.write("Start to check the difference between EAF and reference vcf alt frequency ...{}".format(_get_version()))
|
|
653
|
-
|
|
678
|
+
check_dataframe_shape(sumstats, log, verbose)
|
|
654
679
|
if verbose: log.write(" -Reference vcf file:", ref_infer)
|
|
655
680
|
if verbose: log.write(" -CPU Cores to use :",n_cores)
|
|
656
681
|
|
|
@@ -718,7 +743,7 @@ def check_daf(chr,start,end,ref,alt,eaf,vcf_reader,alt_freq,chr_dict=None):
|
|
|
718
743
|
def paralleleinferaf(sumstats,ref_infer,ref_alt_freq=None,n_cores=1, chr="CHR",pos="POS",ref="NEA",alt="EA",eaf="EAF",status="STATUS",chr_dict=None,force=False, verbose=True,log=Log()):
|
|
719
744
|
|
|
720
745
|
if verbose: log.write("Start to infer the AF and reference vcf alt frequency ...{}".format(_get_version()))
|
|
721
|
-
|
|
746
|
+
check_dataframe_shape(sumstats, log, verbose)
|
|
722
747
|
if verbose: log.write(" -Reference vcf file:", ref_infer)
|
|
723
748
|
if verbose: log.write(" -CPU Cores to use :",n_cores)
|
|
724
749
|
|
|
@@ -8,6 +8,7 @@ from gwaslab.bd_common_data import get_format_dict
|
|
|
8
8
|
from gwaslab.qc_fix_sumstats import sortcolumn
|
|
9
9
|
from gwaslab.qc_fix_sumstats import _process_build
|
|
10
10
|
from gwaslab.qc_check_datatype import check_datatype
|
|
11
|
+
from gwaslab.qc_check_datatype import check_dataframe_memory_usage
|
|
11
12
|
|
|
12
13
|
#20221030
|
|
13
14
|
def preformat(sumstats,
|
|
@@ -353,6 +354,8 @@ def preformat(sumstats,
|
|
|
353
354
|
sumstats = sortcolumn(sumstats=sumstats,log=log,verbose=verbose)
|
|
354
355
|
check_datatype(sumstats,log=log,verbose=verbose)
|
|
355
356
|
gc.collect()
|
|
357
|
+
check_dataframe_memory_usage(sumstats,log=log,verbose=verbose)
|
|
358
|
+
|
|
356
359
|
if verbose: log.write("Finished loading data successfully!")
|
|
357
360
|
return sumstats
|
|
358
361
|
|
|
@@ -457,13 +460,18 @@ def process_neaf(sumstats,log,verbose):
|
|
|
457
460
|
return sumstats
|
|
458
461
|
|
|
459
462
|
def process_allele(sumstats,log,verbose):
|
|
463
|
+
|
|
460
464
|
if "EA" in sumstats.columns:
|
|
465
|
+
|
|
461
466
|
if "REF" in sumstats.columns and "ALT" in sumstats.columns:
|
|
467
|
+
|
|
462
468
|
if "NEA" not in sumstats.columns:
|
|
463
469
|
if verbose: log.write(" NEA not available: assigning REF to NEA...")
|
|
464
470
|
sumstats["NEA"]=sumstats["REF"]
|
|
471
|
+
|
|
465
472
|
if verbose: log.write(" -EA,REF and ALT columns are available: assigning NEA...")
|
|
466
473
|
ea_alt = sumstats["EA"]==sumstats["ALT"]
|
|
474
|
+
|
|
467
475
|
if verbose: log.write(" -For variants with EA == ALT : assigning REF to NEA ...")
|
|
468
476
|
sumstats.loc[ea_alt,"NEA"] = sumstats.loc[ea_alt,"REF"]
|
|
469
477
|
|
|
@@ -474,6 +482,7 @@ def process_allele(sumstats,log,verbose):
|
|
|
474
482
|
#sumstats = sumstats.drop(labels=["REF","ALT"],axis=1)
|
|
475
483
|
sumstats["REF"]=sumstats["REF"].astype("category")
|
|
476
484
|
sumstats["ALT"]=sumstats["ALT"].astype("category")
|
|
485
|
+
|
|
477
486
|
sumstats["EA"]=sumstats["EA"].astype("category")
|
|
478
487
|
if "NEA" in sumstats.columns:
|
|
479
488
|
sumstats["NEA"]=sumstats["NEA"].astype("category")
|
|
@@ -87,3 +87,17 @@ def verify_datatype(header, dtype):
|
|
|
87
87
|
return "F"
|
|
88
88
|
else:
|
|
89
89
|
return "NA"
|
|
90
|
+
|
|
91
|
+
def check_dataframe_shape(sumstats, log, verbose):
|
|
92
|
+
memory_in_mb = sumstats.memory_usage().sum()/1024/1024
|
|
93
|
+
try:
|
|
94
|
+
log.write(" -Current Dataframe shape : {} x {} ; Memory usage: {:.2f} MB".format(len(sumstats),len(sumstats.columns),memory_in_mb), verbose=verbose)
|
|
95
|
+
except:
|
|
96
|
+
log.write(" -WARNING! Error: cannot get Dataframe shape...", verbose=verbose)
|
|
97
|
+
|
|
98
|
+
def check_dataframe_memory_usage(sumstats, log, verbose):
|
|
99
|
+
memory_in_mb = sumstats.memory_usage().sum()/1024/1024
|
|
100
|
+
try:
|
|
101
|
+
log.write(" -Current Dataframe memory usage: {:.2f} MB".format(memory_in_mb), verbose=verbose)
|
|
102
|
+
except:
|
|
103
|
+
log.write(" -WARNING! Error: cannot get Memory usage...", verbose=verbose)
|