gwaslab 3.5.8__tar.gz → 3.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (124) hide show
  1. {gwaslab-3.5.8/src/gwaslab.egg-info → gwaslab-3.6.1}/PKG-INFO +43 -66
  2. {gwaslab-3.5.8 → gwaslab-3.6.1}/README.md +40 -64
  3. {gwaslab-3.5.8 → gwaslab-3.6.1}/pyproject.toml +4 -3
  4. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/data/reference.json +3 -1
  5. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_Sumstats.py +13 -2
  6. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_meta.py +2 -1
  7. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_version.py +2 -2
  8. gwaslab-3.6.1/src/gwaslab/util_ex_infer_ancestry.py +65 -0
  9. gwaslab-3.6.1/src/gwaslab/util_ex_run_magma.py +74 -0
  10. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_in_meta.py +0 -1
  11. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_in_meta_polars.py +0 -2
  12. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_aux_annotate_plot.py +13 -2
  13. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_mqqplot.py +10 -3
  14. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_qqplot.py +3 -1
  15. {gwaslab-3.5.8 → gwaslab-3.6.1/src/gwaslab.egg-info}/PKG-INFO +43 -66
  16. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab.egg-info/SOURCES.txt +2 -0
  17. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab.egg-info/requires.txt +1 -0
  18. {gwaslab-3.5.8 → gwaslab-3.6.1}/LICENSE +0 -0
  19. {gwaslab-3.5.8 → gwaslab-3.6.1}/LICENSE_before_v3.4.39 +0 -0
  20. {gwaslab-3.5.8 → gwaslab-3.6.1}/setup.cfg +0 -0
  21. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/__init__.py +0 -0
  22. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/bd_common_data.py +0 -0
  23. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/bd_config.py +0 -0
  24. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/bd_download.py +0 -0
  25. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/bd_get_hapmap3.py +0 -0
  26. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/cache_manager.py +0 -0
  27. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/data/chrx_par/chrx_par_hg19.bed.gz +0 -0
  28. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/data/chrx_par/chrx_par_hg38.bed.gz +0 -0
  29. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/data/formatbook.json +0 -0
  30. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz +0 -0
  31. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz +0 -0
  32. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz +0 -0
  33. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz +0 -0
  34. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_Log.py +0 -0
  35. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_Phenotypes.py +0 -0
  36. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_SumstatsMulti.py +0 -0
  37. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_SumstatsPair.py +0 -0
  38. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_SumstatsSet.py +0 -0
  39. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_SumstatsT.py +0 -0
  40. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_Sumstats_polars.py +0 -0
  41. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_Sumstats_summary.py +0 -0
  42. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_headers.py +0 -0
  43. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_meta_update.py +0 -0
  44. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_vchange_status.py +0 -0
  45. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/g_vchange_status_polars.py +0 -0
  46. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/hm_casting.py +0 -0
  47. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/hm_casting_polars.py +0 -0
  48. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/hm_harmonize_sumstats.py +0 -0
  49. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/hm_rsid_to_chrpos.py +0 -0
  50. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/io_load_ld.py +0 -0
  51. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/io_preformat_input.py +0 -0
  52. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/io_preformat_input_polars.py +0 -0
  53. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/io_process_args.py +0 -0
  54. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/io_read_ldsc.py +0 -0
  55. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/io_read_pipcs.py +0 -0
  56. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/io_read_tabular.py +0 -0
  57. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/io_to_formats.py +0 -0
  58. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/io_to_pickle.py +0 -0
  59. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/ldsc_irwls.py +0 -0
  60. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/ldsc_jackknife.py +0 -0
  61. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/ldsc_ldscore.py +0 -0
  62. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/ldsc_parse.py +0 -0
  63. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/ldsc_regressions.py +0 -0
  64. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/ldsc_sumstats.py +0 -0
  65. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/prscs_gigrnd.py +0 -0
  66. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/prscs_mcmc_gtb.py +0 -0
  67. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/prscs_parse_genet.py +0 -0
  68. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/qc_build.py +0 -0
  69. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/qc_check_datatype.py +0 -0
  70. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/qc_check_datatype_polars.py +0 -0
  71. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/qc_fix_sumstats.py +0 -0
  72. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/qc_fix_sumstats_polars.py +0 -0
  73. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/run_script.py +0 -0
  74. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_abf_finemapping.py +0 -0
  75. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_calculate_ldmatrix.py +0 -0
  76. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_calculate_prs.py +0 -0
  77. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_gwascatalog.py +0 -0
  78. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_ldproxyfinder.py +0 -0
  79. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_ldsc.py +0 -0
  80. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_match_ldmatrix.py +0 -0
  81. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_plink_filter.py +0 -0
  82. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_process_h5.py +0 -0
  83. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_process_ref.py +0 -0
  84. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_run_2samplemr.py +0 -0
  85. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_run_ccgwas.py +0 -0
  86. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_run_clumping.py +0 -0
  87. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_run_coloc.py +0 -0
  88. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_run_hyprcoloc.py +0 -0
  89. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_run_mesusie.py +0 -0
  90. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_run_mtag.py +0 -0
  91. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_run_prscs.py +0 -0
  92. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_ex_run_susie.py +0 -0
  93. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_in_calculate_gc.py +0 -0
  94. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_in_calculate_power.py +0 -0
  95. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_in_convert_h2.py +0 -0
  96. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_in_correct_winnerscurse.py +0 -0
  97. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_in_estimate_ess.py +0 -0
  98. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_in_fill_data.py +0 -0
  99. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_in_filter_value.py +0 -0
  100. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_in_get_density.py +0 -0
  101. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_in_get_sig.py +0 -0
  102. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_in_merge.py +0 -0
  103. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/util_in_snphwe.py +0 -0
  104. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_aux_chromatin.py +0 -0
  105. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_aux_property.py +0 -0
  106. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_aux_quickfix.py +0 -0
  107. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_aux_reposition_text.py +0 -0
  108. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_aux_save_figure.py +0 -0
  109. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_compare_af.py +0 -0
  110. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_compare_effect.py +0 -0
  111. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_credible_sets.py +0 -0
  112. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_effect.py +0 -0
  113. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_forestplot.py +0 -0
  114. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_miamiplot.py +0 -0
  115. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_miamiplot2.py +0 -0
  116. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_phe_heatmap.py +0 -0
  117. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_regional2.py +0 -0
  118. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_regionalplot.py +0 -0
  119. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_rg_heatmap.py +0 -0
  120. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_scatter_with_reg.py +0 -0
  121. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_stackedregional.py +0 -0
  122. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab/viz_plot_trumpetplot.py +0 -0
  123. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab.egg-info/dependency_links.txt +0 -0
  124. {gwaslab-3.5.8 → gwaslab-3.6.1}/src/gwaslab.egg-info/top_level.txt +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gwaslab
3
- Version: 3.5.8
3
+ Version: 3.6.1
4
4
  Summary: A collection of handy tools for GWAS SumStats
5
5
  Author-email: Yunye <yunye@gwaslab.com>
6
6
  Project-URL: Homepage, https://cloufield.github.io/gwaslab/
7
7
  Project-URL: Github, https://github.com/Cloufield/gwaslab
8
8
  Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
10
10
  Classifier: Operating System :: OS Independent
11
11
  Requires-Python: <3.13,>=3.9
12
12
  Description-Content-Type: text/markdown
@@ -25,6 +25,7 @@ Requires-Dist: scikit-allel>=1.3.5
25
25
  Requires-Dist: pyensembl==2.2.3
26
26
  Requires-Dist: gtfparse==1.3.0
27
27
  Requires-Dist: h5py>=3.10.0
28
+ Requires-Dist: pyarrow
28
29
  Dynamic: license-file
29
30
 
30
31
  # GWASLab
@@ -36,43 +37,63 @@ Dynamic: license-file
36
37
  ![badge_pip](https://img.shields.io/pypi/dm/gwaslab)
37
38
  ![badge_commit_m](https://img.shields.io/github/commit-activity/m/Cloufield/gwaslab)
38
39
 
39
- * A handy Python toolkit for handling GWAS summary statistics (sumstats).
40
+ * A handy Python-based toolkit for handling GWAS summary statistics (sumstats).
40
41
  * Each process is modularized and can be customized to your needs.
41
42
  * Sumstats-specific manipulations are designed as methods of a Python object, `gwaslab.Sumstats`.
42
43
 
43
- Please check GWASLab documentation at [https://cloufield.github.io/gwaslab/](https://cloufield.github.io/gwaslab/)
44
+ ## Installation
44
45
 
45
- Note: GWASLab is being updated very frequently for now. We will release the first stable version soon! Please stay tuned.
46
+ ### install via pip
46
47
 
47
- Warning: Known issues of GWASLab are summarized in [https://cloufield.github.io/gwaslab/KnownIssues/](https://cloufield.github.io/gwaslab/KnownIssues/) .
48
+ The latest version of GWASLab now supports Python 3.9, 3.10, 3.11, and 3.12.
48
49
 
49
- ## Install
50
+ ```bash
51
+ pip install gwaslab
52
+ ```
50
53
 
51
- ### install via pip
54
+ ### install in conda environment
52
55
 
53
- The latest version of GWASLab now supports Python 3.9, 3.10, 3.11, and 3.12.
56
+ Create a Python 3.9, 3.10, 3.11 or 3.12 environment and install gwaslab using pip:
57
+
58
+ ```bash
59
+ conda env create -n gwaslab -c conda-forge python=3.12
60
+
61
+ conda activate gwaslab
54
62
 
63
+ pip install gwaslab
55
64
  ```
56
- pip install gwaslab==3.5.7
65
+
66
+ or create a new environment using yml file [environment.yml](https://github.com/Cloufield/gwaslab/blob/main/environment.yml)
67
+
68
+ ```bash
69
+ conda env create -n gwaslab -f environment.yml
57
70
  ```
58
71
 
72
+ ### install using docker (deprecated)
73
+
74
+ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main/docker/Dockerfile) for building local images.
75
+
76
+ ## Quick start
77
+
59
78
  ```python
79
+
60
80
  import gwaslab as gl
81
+
61
82
  # load plink2 output
62
- mysumstats = gl.Sumstats("t2d_bbj.txt.gz", fmt="plink2")
83
+ mysumstats = gl.Sumstats("sumstats.txt.gz", fmt="plink2")
63
84
 
64
- # load sumstats with auto mode (auto-detecting common headers)
85
+ # or load sumstats with auto mode (auto-detecting commonly used headers)
65
86
  # assuming ALT/A1 is EA, and frq is EAF
66
- mysumstats = gl.Sumstats("t2d_bbj.txt.gz", fmt="auto")
87
+ mysumstats = gl.Sumstats("sumstats.txt.gz", fmt="auto")
67
88
 
68
89
  # or you can specify the columns:
69
- mysumstats = gl.Sumstats("t2d_bbj.txt.gz",
90
+ mysumstats = gl.Sumstats("sumstats.txt.gz",
70
91
  snpid="SNP",
71
92
  chrom="CHR",
72
93
  pos="POS",
73
94
  ea="ALT",
74
95
  nea="REF",
75
- neaf="Frq",
96
+ eaf="Frq",
76
97
  beta="BETA",
77
98
  se="SE",
78
99
  p="P",
@@ -85,26 +106,9 @@ mysumstats.plot_mqq()
85
106
  ...
86
107
  ```
87
108
 
88
- ### install in conda environment
89
-
90
- Create a Python 3.9 environment and install gwaslab using pip:
91
-
92
- ```
93
- conda env create -n gwaslab_test -c conda-forge python=3.9
94
- conda activate gwaslab
95
- pip install gwaslab==3.4.45
96
- ```
97
-
98
- or create a new environment using yml file [environment_3.4.40.yml](https://github.com/Cloufield/gwaslab/blob/main/environment_3.4.40.yml)
99
-
100
- ```
101
- conda env create -n gwaslab -f environment_3.4.40.yml
102
- ```
103
-
104
-
105
- ### install using docker
109
+ ## Documentation and tutorials
106
110
 
107
- A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main/docker/Dockerfile) for building local images.
111
+ Documentation and tutorials for GWASLab are avaiable at [here](https://cloufield.github.io/gwaslab/).
108
112
 
109
113
  ## Functions
110
114
 
@@ -151,7 +155,7 @@ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main
151
155
  - Scatter plot: allele frequency comparison
152
156
  - Scatter plot: trumpet plot (plot of MAF and effect size with power lines)
153
157
 
154
- ### Visualization Examples
158
+ #### Visualization Examples
155
159
 
156
160
  <img width="600" alt="image" src="https://user-images.githubusercontent.com/40289485/233836639-34b03c47-5a59-4fd4-9677-5e13b02aab15.png">
157
161
  <img width="600" alt="image" src="https://user-images.githubusercontent.com/40289485/197393168-e3e7076f-2801-4d66-9526-80778d44f3da.png">
@@ -167,42 +171,15 @@ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main
167
171
  - Sumstats summary: give you a quick overview of the sumstats.
168
172
  - ...
169
173
 
170
- ## Requirements (deprecated)
171
-
172
- environment.yml
174
+ ## Issues
173
175
 
174
- ```
175
- name: gwaslab
176
- channels:
177
- - conda-forge
178
- - defaults
179
- dependencies:
180
- - python=3.8.16=h7a1cb2a_3
181
- - jupyter==1.0.0
182
- - pip==23.1.2
183
- - pip:
184
- - adjusttext==0.8
185
- - biopython==1.81
186
- - gwaslab==3.4.16
187
- - liftover==1.1.16
188
- - matplotlib==3.7.1
189
- - numpy==1.24.2
190
- - pandas==1.4.4
191
- - scikit-allel==1.3.5
192
- - scikit-learn==1.2.2
193
- - scipy==1.10.1
194
- - seaborn==0.11.2
195
- - statsmodels==0.13
196
- - adjustText==0.8
197
- - pysam==0.19
198
- - pyensembl==2.2.3
199
- - h5py==3.10.0
200
- ```
176
+ - GWASLab is currently under active development, with frequent updates.
177
+ - Note: Known issues are documented at https://cloufield.github.io/gwaslab/KnownIssues/.
201
178
 
202
179
  ## How to cite
203
180
  - GWASLab preprint: He, Y., Koido, M., Shimmori, Y., Kamatani, Y. (2023). GWASLab: a Python package for processing and visualizing GWAS summary statistics. Preprint at Jxiv, 2023-5. https://doi.org/10.51094/jxiv.370
204
181
 
205
- ## Sample Data
182
+ ## Sample data used for tutorial
206
183
  - Sample GWAS data used in GWASLab is obtained from: http://jenger.riken.jp/ (Suzuki, Ken, et al. "Identification of 28 new susceptibility loci for type 2 diabetes in the Japanese population." Nature genetics 51.3 (2019): 379-386.).
207
184
 
208
185
  ## Acknowledgement
@@ -7,43 +7,63 @@
7
7
  ![badge_pip](https://img.shields.io/pypi/dm/gwaslab)
8
8
  ![badge_commit_m](https://img.shields.io/github/commit-activity/m/Cloufield/gwaslab)
9
9
 
10
- * A handy Python toolkit for handling GWAS summary statistics (sumstats).
10
+ * A handy Python-based toolkit for handling GWAS summary statistics (sumstats).
11
11
  * Each process is modularized and can be customized to your needs.
12
12
  * Sumstats-specific manipulations are designed as methods of a Python object, `gwaslab.Sumstats`.
13
13
 
14
- Please check GWASLab documentation at [https://cloufield.github.io/gwaslab/](https://cloufield.github.io/gwaslab/)
14
+ ## Installation
15
15
 
16
- Note: GWASLab is being updated very frequently for now. We will release the first stable version soon! Please stay tuned.
16
+ ### install via pip
17
17
 
18
- Warning: Known issues of GWASLab are summarized in [https://cloufield.github.io/gwaslab/KnownIssues/](https://cloufield.github.io/gwaslab/KnownIssues/) .
18
+ The latest version of GWASLab now supports Python 3.9, 3.10, 3.11, and 3.12.
19
19
 
20
- ## Install
20
+ ```bash
21
+ pip install gwaslab
22
+ ```
21
23
 
22
- ### install via pip
24
+ ### install in conda environment
23
25
 
24
- The latest version of GWASLab now supports Python 3.9, 3.10, 3.11, and 3.12.
26
+ Create a Python 3.9, 3.10, 3.11 or 3.12 environment and install gwaslab using pip:
27
+
28
+ ```bash
29
+ conda env create -n gwaslab -c conda-forge python=3.12
30
+
31
+ conda activate gwaslab
25
32
 
33
+ pip install gwaslab
26
34
  ```
27
- pip install gwaslab==3.5.7
35
+
36
+ or create a new environment using yml file [environment.yml](https://github.com/Cloufield/gwaslab/blob/main/environment.yml)
37
+
38
+ ```bash
39
+ conda env create -n gwaslab -f environment.yml
28
40
  ```
29
41
 
42
+ ### install using docker (deprecated)
43
+
44
+ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main/docker/Dockerfile) for building local images.
45
+
46
+ ## Quick start
47
+
30
48
  ```python
49
+
31
50
  import gwaslab as gl
51
+
32
52
  # load plink2 output
33
- mysumstats = gl.Sumstats("t2d_bbj.txt.gz", fmt="plink2")
53
+ mysumstats = gl.Sumstats("sumstats.txt.gz", fmt="plink2")
34
54
 
35
- # load sumstats with auto mode (auto-detecting common headers)
55
+ # or load sumstats with auto mode (auto-detecting commonly used headers)
36
56
  # assuming ALT/A1 is EA, and frq is EAF
37
- mysumstats = gl.Sumstats("t2d_bbj.txt.gz", fmt="auto")
57
+ mysumstats = gl.Sumstats("sumstats.txt.gz", fmt="auto")
38
58
 
39
59
  # or you can specify the columns:
40
- mysumstats = gl.Sumstats("t2d_bbj.txt.gz",
60
+ mysumstats = gl.Sumstats("sumstats.txt.gz",
41
61
  snpid="SNP",
42
62
  chrom="CHR",
43
63
  pos="POS",
44
64
  ea="ALT",
45
65
  nea="REF",
46
- neaf="Frq",
66
+ eaf="Frq",
47
67
  beta="BETA",
48
68
  se="SE",
49
69
  p="P",
@@ -56,26 +76,9 @@ mysumstats.plot_mqq()
56
76
  ...
57
77
  ```
58
78
 
59
- ### install in conda environment
60
-
61
- Create a Python 3.9 environment and install gwaslab using pip:
62
-
63
- ```
64
- conda env create -n gwaslab_test -c conda-forge python=3.9
65
- conda activate gwaslab
66
- pip install gwaslab==3.4.45
67
- ```
68
-
69
- or create a new environment using yml file [environment_3.4.40.yml](https://github.com/Cloufield/gwaslab/blob/main/environment_3.4.40.yml)
70
-
71
- ```
72
- conda env create -n gwaslab -f environment_3.4.40.yml
73
- ```
74
-
75
-
76
- ### install using docker
79
+ ## Documentation and tutorials
77
80
 
78
- A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main/docker/Dockerfile) for building local images.
81
+ Documentation and tutorials for GWASLab are avaiable at [here](https://cloufield.github.io/gwaslab/).
79
82
 
80
83
  ## Functions
81
84
 
@@ -122,7 +125,7 @@ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main
122
125
  - Scatter plot: allele frequency comparison
123
126
  - Scatter plot: trumpet plot (plot of MAF and effect size with power lines)
124
127
 
125
- ### Visualization Examples
128
+ #### Visualization Examples
126
129
 
127
130
  <img width="600" alt="image" src="https://user-images.githubusercontent.com/40289485/233836639-34b03c47-5a59-4fd4-9677-5e13b02aab15.png">
128
131
  <img width="600" alt="image" src="https://user-images.githubusercontent.com/40289485/197393168-e3e7076f-2801-4d66-9526-80778d44f3da.png">
@@ -138,42 +141,15 @@ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main
138
141
  - Sumstats summary: give you a quick overview of the sumstats.
139
142
  - ...
140
143
 
141
- ## Requirements (deprecated)
142
-
143
- environment.yml
144
+ ## Issues
144
145
 
145
- ```
146
- name: gwaslab
147
- channels:
148
- - conda-forge
149
- - defaults
150
- dependencies:
151
- - python=3.8.16=h7a1cb2a_3
152
- - jupyter==1.0.0
153
- - pip==23.1.2
154
- - pip:
155
- - adjusttext==0.8
156
- - biopython==1.81
157
- - gwaslab==3.4.16
158
- - liftover==1.1.16
159
- - matplotlib==3.7.1
160
- - numpy==1.24.2
161
- - pandas==1.4.4
162
- - scikit-allel==1.3.5
163
- - scikit-learn==1.2.2
164
- - scipy==1.10.1
165
- - seaborn==0.11.2
166
- - statsmodels==0.13
167
- - adjustText==0.8
168
- - pysam==0.19
169
- - pyensembl==2.2.3
170
- - h5py==3.10.0
171
- ```
146
+ - GWASLab is currently under active development, with frequent updates.
147
+ - Note: Known issues are documented at https://cloufield.github.io/gwaslab/KnownIssues/.
172
148
 
173
149
  ## How to cite
174
150
  - GWASLab preprint: He, Y., Koido, M., Shimmori, Y., Kamatani, Y. (2023). GWASLab: a Python package for processing and visualizing GWAS summary statistics. Preprint at Jxiv, 2023-5. https://doi.org/10.51094/jxiv.370
175
151
 
176
- ## Sample Data
152
+ ## Sample data used for tutorial
177
153
  - Sample GWAS data used in GWASLab is obtained from: http://jenger.riken.jp/ (Suzuki, Ken, et al. "Identification of 28 new susceptibility loci for type 2 diabetes in the Japanese population." Nature genetics 51.3 (2019): 379-386.).
178
154
 
179
155
  ## Acknowledgement
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
7
7
 
8
8
  [project]
9
9
  name = "gwaslab"
10
- version = "3.5.8"
10
+ version = "3.6.1"
11
11
  authors = [
12
12
  { name="Yunye", email="yunye@gwaslab.com" },
13
13
  ]
@@ -28,13 +28,14 @@ dependencies = [
28
28
  "scikit-allel>=1.3.5",
29
29
  "pyensembl==2.2.3",
30
30
  "gtfparse==1.3.0",
31
- "h5py>=3.10.0"
31
+ "h5py>=3.10.0",
32
+ "pyarrow"
32
33
  ]
33
34
 
34
35
  requires-python = ">=3.9,<3.13"
35
36
  classifiers = [
36
37
  "Programming Language :: Python :: 3",
37
- "License :: OSI Approved :: MIT License",
38
+ "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
38
39
  "Operating System :: OS Independent",
39
40
  ]
40
41
 
@@ -103,7 +103,9 @@
103
103
  "13to19":"https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/chm13v2-hg19.chain",
104
104
  "13to38":"https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/chm13v2-grch38.chain",
105
105
  "18to19":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz",
106
- "18to38":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg38.over.chain.gz"
106
+ "18to38":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg38.over.chain.gz",
107
+ "1kg_hm3_hg38_eaf":"https://www.dropbox.com/scl/fi/ymkqfsaec6mwjzlvxsm45/PAN.hapmap3.hg38.EAF.tsv.gz?rlkey=p1auef5y1kk7ui41k6j3s8b0z&dl=1",
108
+ "1kg_hm3_hg19_eaf":"https://www.dropbox.com/scl/fi/dmv9wtfchv6ahim86d49r/PAN.hapmap3.hg19.EAF.tsv.gz?rlkey=ywne2gj1rlm2nj42q9lt2d99n&dl=1"
107
109
  }
108
110
 
109
111
 
@@ -84,6 +84,8 @@ from gwaslab.util_ex_ldsc import _estimate_rg_by_ldsc
84
84
  from gwaslab.util_ex_ldsc import _estimate_h2_cts_by_ldsc
85
85
  from gwaslab.util_ex_ldsc import _estimate_partitioned_h2_by_ldsc
86
86
  from gwaslab.util_ex_ldproxyfinder import _extract_ld_proxy
87
+ from gwaslab.util_ex_run_magma import _run_magma
88
+ from gwaslab.util_ex_infer_ancestry import _infer_ancestry
87
89
  from gwaslab.bd_get_hapmap3 import gethapmap3
88
90
  from gwaslab.util_abf_finemapping import abf_finemapping
89
91
  from gwaslab.util_abf_finemapping import make_cs
@@ -674,6 +676,9 @@ class Sumstats():
674
676
  fig,outliers = plotdaf(self.data, **kwargs)
675
677
  return fig, outliers
676
678
 
679
+ def infer_ancestry(self, **kwargs):
680
+ self.meta["gwaslab"]["inferred_ancestry"] = _infer_ancestry(self.data, **kwargs)
681
+
677
682
  def plot_gwheatmap(self, **kwargs):
678
683
  fig = _gwheatmap(self.data, **kwargs)
679
684
  return fig
@@ -882,8 +887,14 @@ class Sumstats():
882
887
  if build is None:
883
888
  build = self.meta["gwaslab"]["genome_build"]
884
889
  insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
885
- _run_prscs(sst_file = insumstats[["rsID","CHR","POS","EA","NEA","BETA","SE"]], log=self.log, **kwargs)
886
-
890
+ _run_prscs(sst_file = insumstats[["rsID","CHR","POS","EA","NEA","BETA","SE"]],
891
+ log=self.log,
892
+ **kwargs)
893
+
894
+ def run_magma(self, build=None, verbose=True, **kwargs):
895
+ _run_magma(self.data,
896
+ study=self.meta["gwaslab"]["study_name"],
897
+ build=build, verbose=verbose, log=self.log, **kwargs)
887
898
  ## LDSC ##############################################################################################
888
899
  def estimate_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, how="right", **kwargs):
889
900
  if build is None:
@@ -6,7 +6,7 @@ def _init_meta(object="Sumstats"):
6
6
  "gwas_id":"Unknown",
7
7
  "samples":{
8
8
  "sample_size":"Unknown",
9
- "sample_ancestry":"European",
9
+ "sample_ancestry":"Unknown",
10
10
  "ancestry_method":"self-reported|genetically determined",
11
11
  } ,
12
12
  "trait_description":"Unknown",
@@ -37,6 +37,7 @@ def _init_meta(object="Sumstats"):
37
37
  "species":"homo sapiens",
38
38
  "genome_build":"99",
39
39
  "sample_prevalence":"Unknown",
40
+ "inferred_ancestry":"Unknown",
40
41
  "population_prevalence":"Unknown",
41
42
  "variants":{
42
43
  "variant_number":"Unknown",
@@ -15,8 +15,8 @@ def _get_version():
15
15
  def gwaslab_info():
16
16
  # version meta information
17
17
  dic={
18
- "version":"3.5.8",
19
- "release_date":"20250424"
18
+ "version":"3.6.1",
19
+ "release_date":"20250506"
20
20
  }
21
21
  return dic
22
22
 
@@ -0,0 +1,65 @@
1
+
2
+ import pandas as pd
3
+ from gwaslab.g_Log import Log
4
+
5
+ def _infer_ancestry(sumstats,
6
+ ancestry_af=None,
7
+ build="19",
8
+ log=Log(),
9
+ verbose=True):
10
+ log.write("Start to infer ancestry based on Fst...", verbose=verbose)
11
+ ref_af = pd.read_csv(ancestry_af, sep="\t")
12
+
13
+ data_af = pd.merge(sumstats[["CHR","POS","EA","NEA","EAF"]] ,ref_af,on=["CHR","POS"],how="inner")
14
+
15
+ log.write(f" -Estimating Fst using {len(data_af)} variants...", verbose=verbose)
16
+
17
+ is_filp = data_af["EA"] == data_af["ALT"]
18
+ data_af.loc[is_filp, ["EA","NEA"]] = data_af.loc[is_filp, ["NEA","EA"]]
19
+ data_af.loc[is_filp, "EAF"] = 1 - data_af.loc[is_filp, "EAF"]
20
+
21
+ headers = []
22
+ for i in ['GBR', 'FIN', 'CHS', 'PUR', 'CDX',
23
+ 'CLM', 'IBS', 'PEL', 'PJL', 'KHV', 'ACB', 'GWD', 'ESN', 'BEB', 'MSL',
24
+ 'STU', 'ITU', 'CEU', 'YRI', 'CHB', 'JPT', 'LWK', 'ASW', 'MXL', 'TSI',
25
+ 'GIH', 'EUR', 'EAS', 'AMR', 'SAS', 'AFR']:
26
+ headers.append(f"FST_{i}")
27
+ data_af[f"FST_{i}"] = data_af.apply(lambda x: calculate_fst(x["EAF"], x[i]), axis=1)
28
+
29
+ for i,value in data_af[headers].mean().sort_values().items():
30
+ log.write( f" -{i} : {value}", verbose=verbose)
31
+
32
+ closest_ancestry = data_af[headers].mean().sort_values().idxmin()
33
+
34
+ log.write(f" -Closest Ancestry: {closest_ancestry.split('_')[1]}", verbose=verbose)
35
+ log.write("Finished inferring ancestry.", verbose=verbose)
36
+ return closest_ancestry.split("_")[1]
37
+
38
+ def calculate_fst(p_1, p_2):
39
+ # https://bios1140.github.io/understanding-fst-the-fixation-index.html
40
+ # calculate q1 and q2
41
+ q_1 = 1 - p_1
42
+ q_2 = 1 - p_2
43
+
44
+ # calculate total allele frequency
45
+ p_t = (p_1 + p_2)/2
46
+ q_t = 1 - p_t
47
+
48
+ # calculate expected heterozygosity
49
+ # first calculate expected heterozygosity for the two populations
50
+ # pop1
51
+ hs_1 = 2*p_1*q_1
52
+ # pop2
53
+ hs_2 = 2*p_2*q_2
54
+ # then take the mean of this
55
+ hs = (hs_1 + hs_2)/2
56
+
57
+ # next calculate expected heterozygosity for the metapopulations
58
+ ht = 2*p_t*q_t
59
+
60
+ # calculate fst
61
+ fst = (ht - hs)/ht
62
+
63
+ # return output
64
+ return fst
65
+
@@ -0,0 +1,74 @@
1
+ import subprocess
2
+ import os
3
+ import gc
4
+ import pandas as pd
5
+ import numpy as np
6
+ from gwaslab.g_Log import Log
7
+ from gwaslab.util_in_filter_value import _exclude_hla
8
+
9
+ def _run_magma(sumstats,
10
+ magma="magma",
11
+ study="Study1",
12
+ exclude_hla=True,
13
+ window="35,10",
14
+ id_to_use="rsID",
15
+ ref=None,
16
+ ncbi=None,
17
+ set_annot=None,
18
+ out="./",
19
+ delete=True,
20
+ ncol="N",
21
+ build="19",
22
+ log=Log(),
23
+ verbose=True):
24
+
25
+ log.write(" Start to run magma from command line:", verbose=verbose)
26
+
27
+ if exclude_hla==True:
28
+ sumstats = _exclude_hla(sumstats, build =build)
29
+
30
+ snploc="{}{}.rsid.chr.pos.tsv".format(out,study)
31
+ pval="{}{}.rsid.p.n.tsv".format(out, study)
32
+
33
+ log.write(f" -writing temp file for --snp-loc:{snploc}", verbose=verbose)
34
+ sumstats.dropna()[[id_to_use,"CHR","POS"]].rename(columns={id_to_use:"SNP"}).to_csv("{}{}.rsid.chr.pos.tsv".format(out,study),index=None, sep="\t")
35
+
36
+ log.write(f" -writing temp file for --pval:{pval}", verbose=verbose)
37
+ sumstats.dropna()[[id_to_use,"P","N"]].rename(columns={id_to_use:"SNP"}).to_csv("{}{}.rsid.p.n.tsv".format(out,study),index=None, sep="\t")
38
+
39
+ log.write(f" --annotate window: {window}", verbose=verbose)
40
+ log.write(f" --gene-loc: {ncbi}", verbose=verbose)
41
+ log.write(f" --bfile: {ref}", verbose=verbose)
42
+ log.write(f" Output prefix: {out}", verbose=verbose)
43
+
44
+ bash_script=f'''
45
+
46
+ #!/bin/bash
47
+
48
+ {magma} --annotate window={window} --snp-loc {snploc} --gene-loc {ncbi} --out {study}
49
+
50
+ {magma} --bfile {ref} --pval {pval} ncol={ncol} --gene-annot {study}.genes.annot --out {study}
51
+
52
+ '''
53
+
54
+ if set_annot is not None:
55
+ bash_script+=f'''
56
+ {magma} --gene-results {study}.genes.raw --set-annot {set_annot} --out {study}
57
+ '''
58
+ log.write(f"Script: {bash_script}")
59
+
60
+ try:
61
+ log.write(" Running magma from command line...", verbose=verbose)
62
+ output = subprocess.check_output(bash_script, stderr=subprocess.STDOUT, shell=True,text=True)
63
+ output = output + "\n"
64
+
65
+ if delete == True:
66
+ os.remove(snploc)
67
+ os.remove(pval)
68
+
69
+ except subprocess.CalledProcessError as e:
70
+ log.warning("ERROR!")
71
+ log.write(e.output)
72
+
73
+ log.write("Finished running magma.", verbose=verbose)
74
+
@@ -7,7 +7,6 @@ from gwaslab.g_Log import Log
7
7
  from gwaslab.io_to_pickle import load_data_from_pickle
8
8
  from gwaslab.g_Sumstats import Sumstats
9
9
  import gc
10
- import statsmodels.api as sm
11
10
 
12
11
  def meta_analyze(sumstats_list,
13
12
  random_effects=False,
@@ -6,9 +6,7 @@ from scipy.stats import norm
6
6
  from gwaslab.g_Log import Log
7
7
  from gwaslab.io_to_pickle import load_data_from_pickle
8
8
  from gwaslab.g_Sumstats import Sumstats
9
- import gc
10
9
  import polars as pl
11
- import statsmodels.api as sm
12
10
  ########################################################################################################################################################################################################################################################################################################################################################
13
11
  ########################################################################################################################################################################################################################################################################################################################################################
14
12
  ########################################################################################################################################################################################################################################################################################################################################################
@@ -26,6 +26,7 @@ def annotate_single(
26
26
  anno_alias,
27
27
  anno_style,
28
28
  anno_args,
29
+ anno_args_single,
29
30
  arm_scale,
30
31
  anno_max_iter,
31
32
  arm_scale_d,
@@ -216,13 +217,23 @@ def annotate_single(
216
217
  if anno_style == "tight" :
217
218
  anno_default["rotation"] = 90
218
219
  ################################################################################################################################
219
-
220
+ # anno args for all
220
221
  for key,value in anno_args.items():
221
222
  anno_default[key]=value
222
- if len(highlight_i) >0 and highlight_chrpos==True:
223
+
224
+ # anno args for highlight group
225
+ if len(highlight_i) >0:
223
226
  if row["i"] in highlight_i:
224
227
  for key,value in highlight_anno_args.items():
225
228
  anno_default[key]=value
229
+
230
+ # anno args for specifc
231
+ #try:
232
+ if row[snpid] in anno_args_single.keys():
233
+ for key,value in anno_args_single[row[snpid]].items():
234
+ anno_default[key]=value
235
+ #except:
236
+ # pass
226
237
  ################################################################################################################################
227
238
  if anno_adjust==True:
228
239
  if _invert==False: