gwaslab 3.5.8__tar.gz → 3.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (124) hide show
  1. {gwaslab-3.5.8/src/gwaslab.egg-info → gwaslab-3.6.0}/PKG-INFO +42 -66
  2. {gwaslab-3.5.8 → gwaslab-3.6.0}/README.md +40 -64
  3. {gwaslab-3.5.8 → gwaslab-3.6.0}/pyproject.toml +2 -2
  4. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/data/reference.json +3 -1
  5. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_Sumstats.py +13 -2
  6. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_meta.py +2 -1
  7. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_version.py +2 -2
  8. gwaslab-3.6.0/src/gwaslab/util_ex_infer_ancestry.py +65 -0
  9. gwaslab-3.6.0/src/gwaslab/util_ex_run_magma.py +74 -0
  10. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_aux_annotate_plot.py +13 -2
  11. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_mqqplot.py +10 -3
  12. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_qqplot.py +3 -1
  13. {gwaslab-3.5.8 → gwaslab-3.6.0/src/gwaslab.egg-info}/PKG-INFO +42 -66
  14. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab.egg-info/SOURCES.txt +2 -0
  15. {gwaslab-3.5.8 → gwaslab-3.6.0}/LICENSE +0 -0
  16. {gwaslab-3.5.8 → gwaslab-3.6.0}/LICENSE_before_v3.4.39 +0 -0
  17. {gwaslab-3.5.8 → gwaslab-3.6.0}/setup.cfg +0 -0
  18. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/__init__.py +0 -0
  19. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/bd_common_data.py +0 -0
  20. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/bd_config.py +0 -0
  21. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/bd_download.py +0 -0
  22. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/bd_get_hapmap3.py +0 -0
  23. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/cache_manager.py +0 -0
  24. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/data/chrx_par/chrx_par_hg19.bed.gz +0 -0
  25. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/data/chrx_par/chrx_par_hg38.bed.gz +0 -0
  26. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/data/formatbook.json +0 -0
  27. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz +0 -0
  28. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz +0 -0
  29. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz +0 -0
  30. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz +0 -0
  31. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_Log.py +0 -0
  32. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_Phenotypes.py +0 -0
  33. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_SumstatsMulti.py +0 -0
  34. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_SumstatsPair.py +0 -0
  35. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_SumstatsSet.py +0 -0
  36. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_SumstatsT.py +0 -0
  37. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_Sumstats_polars.py +0 -0
  38. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_Sumstats_summary.py +0 -0
  39. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_headers.py +0 -0
  40. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_meta_update.py +0 -0
  41. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_vchange_status.py +0 -0
  42. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/g_vchange_status_polars.py +0 -0
  43. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/hm_casting.py +0 -0
  44. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/hm_casting_polars.py +0 -0
  45. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/hm_harmonize_sumstats.py +0 -0
  46. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/hm_rsid_to_chrpos.py +0 -0
  47. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/io_load_ld.py +0 -0
  48. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/io_preformat_input.py +0 -0
  49. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/io_preformat_input_polars.py +0 -0
  50. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/io_process_args.py +0 -0
  51. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/io_read_ldsc.py +0 -0
  52. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/io_read_pipcs.py +0 -0
  53. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/io_read_tabular.py +0 -0
  54. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/io_to_formats.py +0 -0
  55. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/io_to_pickle.py +0 -0
  56. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/ldsc_irwls.py +0 -0
  57. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/ldsc_jackknife.py +0 -0
  58. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/ldsc_ldscore.py +0 -0
  59. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/ldsc_parse.py +0 -0
  60. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/ldsc_regressions.py +0 -0
  61. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/ldsc_sumstats.py +0 -0
  62. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/prscs_gigrnd.py +0 -0
  63. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/prscs_mcmc_gtb.py +0 -0
  64. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/prscs_parse_genet.py +0 -0
  65. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/qc_build.py +0 -0
  66. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/qc_check_datatype.py +0 -0
  67. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/qc_check_datatype_polars.py +0 -0
  68. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/qc_fix_sumstats.py +0 -0
  69. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/qc_fix_sumstats_polars.py +0 -0
  70. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/run_script.py +0 -0
  71. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_abf_finemapping.py +0 -0
  72. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_calculate_ldmatrix.py +0 -0
  73. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_calculate_prs.py +0 -0
  74. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_gwascatalog.py +0 -0
  75. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_ldproxyfinder.py +0 -0
  76. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_ldsc.py +0 -0
  77. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_match_ldmatrix.py +0 -0
  78. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_plink_filter.py +0 -0
  79. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_process_h5.py +0 -0
  80. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_process_ref.py +0 -0
  81. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_run_2samplemr.py +0 -0
  82. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_run_ccgwas.py +0 -0
  83. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_run_clumping.py +0 -0
  84. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_run_coloc.py +0 -0
  85. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_run_hyprcoloc.py +0 -0
  86. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_run_mesusie.py +0 -0
  87. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_run_mtag.py +0 -0
  88. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_run_prscs.py +0 -0
  89. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_ex_run_susie.py +0 -0
  90. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_in_calculate_gc.py +0 -0
  91. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_in_calculate_power.py +0 -0
  92. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_in_convert_h2.py +0 -0
  93. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_in_correct_winnerscurse.py +0 -0
  94. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_in_estimate_ess.py +0 -0
  95. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_in_fill_data.py +0 -0
  96. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_in_filter_value.py +0 -0
  97. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_in_get_density.py +0 -0
  98. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_in_get_sig.py +0 -0
  99. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_in_merge.py +0 -0
  100. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_in_meta.py +0 -0
  101. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_in_meta_polars.py +0 -0
  102. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/util_in_snphwe.py +0 -0
  103. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_aux_chromatin.py +0 -0
  104. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_aux_property.py +0 -0
  105. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_aux_quickfix.py +0 -0
  106. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_aux_reposition_text.py +0 -0
  107. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_aux_save_figure.py +0 -0
  108. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_compare_af.py +0 -0
  109. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_compare_effect.py +0 -0
  110. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_credible_sets.py +0 -0
  111. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_effect.py +0 -0
  112. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_forestplot.py +0 -0
  113. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_miamiplot.py +0 -0
  114. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_miamiplot2.py +0 -0
  115. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_phe_heatmap.py +0 -0
  116. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_regional2.py +0 -0
  117. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_regionalplot.py +0 -0
  118. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_rg_heatmap.py +0 -0
  119. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_scatter_with_reg.py +0 -0
  120. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_stackedregional.py +0 -0
  121. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab/viz_plot_trumpetplot.py +0 -0
  122. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab.egg-info/dependency_links.txt +0 -0
  123. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab.egg-info/requires.txt +0 -0
  124. {gwaslab-3.5.8 → gwaslab-3.6.0}/src/gwaslab.egg-info/top_level.txt +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gwaslab
3
- Version: 3.5.8
3
+ Version: 3.6.0
4
4
  Summary: A collection of handy tools for GWAS SumStats
5
5
  Author-email: Yunye <yunye@gwaslab.com>
6
6
  Project-URL: Homepage, https://cloufield.github.io/gwaslab/
7
7
  Project-URL: Github, https://github.com/Cloufield/gwaslab
8
8
  Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
10
10
  Classifier: Operating System :: OS Independent
11
11
  Requires-Python: <3.13,>=3.9
12
12
  Description-Content-Type: text/markdown
@@ -36,43 +36,63 @@ Dynamic: license-file
36
36
  ![badge_pip](https://img.shields.io/pypi/dm/gwaslab)
37
37
  ![badge_commit_m](https://img.shields.io/github/commit-activity/m/Cloufield/gwaslab)
38
38
 
39
- * A handy Python toolkit for handling GWAS summary statistics (sumstats).
39
+ * A handy Python-based toolkit for handling GWAS summary statistics (sumstats).
40
40
  * Each process is modularized and can be customized to your needs.
41
41
  * Sumstats-specific manipulations are designed as methods of a Python object, `gwaslab.Sumstats`.
42
42
 
43
- Please check GWASLab documentation at [https://cloufield.github.io/gwaslab/](https://cloufield.github.io/gwaslab/)
43
+ ## Installation
44
44
 
45
- Note: GWASLab is being updated very frequently for now. We will release the first stable version soon! Please stay tuned.
45
+ ### install via pip
46
46
 
47
- Warning: Known issues of GWASLab are summarized in [https://cloufield.github.io/gwaslab/KnownIssues/](https://cloufield.github.io/gwaslab/KnownIssues/) .
47
+ The latest version of GWASLab now supports Python 3.9, 3.10, 3.11, and 3.12.
48
48
 
49
- ## Install
49
+ ```bash
50
+ pip install gwaslab
51
+ ```
50
52
 
51
- ### install via pip
53
+ ### install in conda environment
52
54
 
53
- The latest version of GWASLab now supports Python 3.9, 3.10, 3.11, and 3.12.
55
+ Create a Python 3.9, 3.10, 3.11 or 3.12 environment and install gwaslab using pip:
56
+
57
+ ```bash
58
+ conda env create -n gwaslab -c conda-forge python=3.12
59
+
60
+ conda activate gwaslab
54
61
 
62
+ pip install gwaslab
55
63
  ```
56
- pip install gwaslab==3.5.7
64
+
65
+ or create a new environment using yml file [environment.yml](https://github.com/Cloufield/gwaslab/blob/main/environment.yml)
66
+
67
+ ```bash
68
+ conda env create -n gwaslab -f environment.yml
57
69
  ```
58
70
 
71
+ ### install using docker (deprecated)
72
+
73
+ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main/docker/Dockerfile) for building local images.
74
+
75
+ ## Quick start
76
+
59
77
  ```python
78
+
60
79
  import gwaslab as gl
80
+
61
81
  # load plink2 output
62
- mysumstats = gl.Sumstats("t2d_bbj.txt.gz", fmt="plink2")
82
+ mysumstats = gl.Sumstats("sumstats.txt.gz", fmt="plink2")
63
83
 
64
- # load sumstats with auto mode (auto-detecting common headers)
84
+ # or load sumstats with auto mode (auto-detecting commonly used headers)
65
85
  # assuming ALT/A1 is EA, and frq is EAF
66
- mysumstats = gl.Sumstats("t2d_bbj.txt.gz", fmt="auto")
86
+ mysumstats = gl.Sumstats("sumstats.txt.gz", fmt="auto")
67
87
 
68
88
  # or you can specify the columns:
69
- mysumstats = gl.Sumstats("t2d_bbj.txt.gz",
89
+ mysumstats = gl.Sumstats("sumstats.txt.gz",
70
90
  snpid="SNP",
71
91
  chrom="CHR",
72
92
  pos="POS",
73
93
  ea="ALT",
74
94
  nea="REF",
75
- neaf="Frq",
95
+ eaf="Frq",
76
96
  beta="BETA",
77
97
  se="SE",
78
98
  p="P",
@@ -85,26 +105,9 @@ mysumstats.plot_mqq()
85
105
  ...
86
106
  ```
87
107
 
88
- ### install in conda environment
89
-
90
- Create a Python 3.9 environment and install gwaslab using pip:
91
-
92
- ```
93
- conda env create -n gwaslab_test -c conda-forge python=3.9
94
- conda activate gwaslab
95
- pip install gwaslab==3.4.45
96
- ```
97
-
98
- or create a new environment using yml file [environment_3.4.40.yml](https://github.com/Cloufield/gwaslab/blob/main/environment_3.4.40.yml)
99
-
100
- ```
101
- conda env create -n gwaslab -f environment_3.4.40.yml
102
- ```
103
-
104
-
105
- ### install using docker
108
+ ## Documentation and tutorials
106
109
 
107
- A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main/docker/Dockerfile) for building local images.
110
+ Documentation and tutorials for GWASLab are avaiable at [here](https://cloufield.github.io/gwaslab/).
108
111
 
109
112
  ## Functions
110
113
 
@@ -151,7 +154,7 @@ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main
151
154
  - Scatter plot: allele frequency comparison
152
155
  - Scatter plot: trumpet plot (plot of MAF and effect size with power lines)
153
156
 
154
- ### Visualization Examples
157
+ #### Visualization Examples
155
158
 
156
159
  <img width="600" alt="image" src="https://user-images.githubusercontent.com/40289485/233836639-34b03c47-5a59-4fd4-9677-5e13b02aab15.png">
157
160
  <img width="600" alt="image" src="https://user-images.githubusercontent.com/40289485/197393168-e3e7076f-2801-4d66-9526-80778d44f3da.png">
@@ -167,42 +170,15 @@ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main
167
170
  - Sumstats summary: give you a quick overview of the sumstats.
168
171
  - ...
169
172
 
170
- ## Requirements (deprecated)
171
-
172
- environment.yml
173
+ ## Issues
173
174
 
174
- ```
175
- name: gwaslab
176
- channels:
177
- - conda-forge
178
- - defaults
179
- dependencies:
180
- - python=3.8.16=h7a1cb2a_3
181
- - jupyter==1.0.0
182
- - pip==23.1.2
183
- - pip:
184
- - adjusttext==0.8
185
- - biopython==1.81
186
- - gwaslab==3.4.16
187
- - liftover==1.1.16
188
- - matplotlib==3.7.1
189
- - numpy==1.24.2
190
- - pandas==1.4.4
191
- - scikit-allel==1.3.5
192
- - scikit-learn==1.2.2
193
- - scipy==1.10.1
194
- - seaborn==0.11.2
195
- - statsmodels==0.13
196
- - adjustText==0.8
197
- - pysam==0.19
198
- - pyensembl==2.2.3
199
- - h5py==3.10.0
200
- ```
175
+ - GWASLab is currently under active development, with frequent updates.
176
+ - Note: Known issues are documented at https://cloufield.github.io/gwaslab/KnownIssues/.
201
177
 
202
178
  ## How to cite
203
179
  - GWASLab preprint: He, Y., Koido, M., Shimmori, Y., Kamatani, Y. (2023). GWASLab: a Python package for processing and visualizing GWAS summary statistics. Preprint at Jxiv, 2023-5. https://doi.org/10.51094/jxiv.370
204
180
 
205
- ## Sample Data
181
+ ## Sample data used for tutorial
206
182
  - Sample GWAS data used in GWASLab is obtained from: http://jenger.riken.jp/ (Suzuki, Ken, et al. "Identification of 28 new susceptibility loci for type 2 diabetes in the Japanese population." Nature genetics 51.3 (2019): 379-386.).
207
183
 
208
184
  ## Acknowledgement
@@ -7,43 +7,63 @@
7
7
  ![badge_pip](https://img.shields.io/pypi/dm/gwaslab)
8
8
  ![badge_commit_m](https://img.shields.io/github/commit-activity/m/Cloufield/gwaslab)
9
9
 
10
- * A handy Python toolkit for handling GWAS summary statistics (sumstats).
10
+ * A handy Python-based toolkit for handling GWAS summary statistics (sumstats).
11
11
  * Each process is modularized and can be customized to your needs.
12
12
  * Sumstats-specific manipulations are designed as methods of a Python object, `gwaslab.Sumstats`.
13
13
 
14
- Please check GWASLab documentation at [https://cloufield.github.io/gwaslab/](https://cloufield.github.io/gwaslab/)
14
+ ## Installation
15
15
 
16
- Note: GWASLab is being updated very frequently for now. We will release the first stable version soon! Please stay tuned.
16
+ ### install via pip
17
17
 
18
- Warning: Known issues of GWASLab are summarized in [https://cloufield.github.io/gwaslab/KnownIssues/](https://cloufield.github.io/gwaslab/KnownIssues/) .
18
+ The latest version of GWASLab now supports Python 3.9, 3.10, 3.11, and 3.12.
19
19
 
20
- ## Install
20
+ ```bash
21
+ pip install gwaslab
22
+ ```
21
23
 
22
- ### install via pip
24
+ ### install in conda environment
23
25
 
24
- The latest version of GWASLab now supports Python 3.9, 3.10, 3.11, and 3.12.
26
+ Create a Python 3.9, 3.10, 3.11 or 3.12 environment and install gwaslab using pip:
27
+
28
+ ```bash
29
+ conda env create -n gwaslab -c conda-forge python=3.12
30
+
31
+ conda activate gwaslab
25
32
 
33
+ pip install gwaslab
26
34
  ```
27
- pip install gwaslab==3.5.7
35
+
36
+ or create a new environment using yml file [environment.yml](https://github.com/Cloufield/gwaslab/blob/main/environment.yml)
37
+
38
+ ```bash
39
+ conda env create -n gwaslab -f environment.yml
28
40
  ```
29
41
 
42
+ ### install using docker (deprecated)
43
+
44
+ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main/docker/Dockerfile) for building local images.
45
+
46
+ ## Quick start
47
+
30
48
  ```python
49
+
31
50
  import gwaslab as gl
51
+
32
52
  # load plink2 output
33
- mysumstats = gl.Sumstats("t2d_bbj.txt.gz", fmt="plink2")
53
+ mysumstats = gl.Sumstats("sumstats.txt.gz", fmt="plink2")
34
54
 
35
- # load sumstats with auto mode (auto-detecting common headers)
55
+ # or load sumstats with auto mode (auto-detecting commonly used headers)
36
56
  # assuming ALT/A1 is EA, and frq is EAF
37
- mysumstats = gl.Sumstats("t2d_bbj.txt.gz", fmt="auto")
57
+ mysumstats = gl.Sumstats("sumstats.txt.gz", fmt="auto")
38
58
 
39
59
  # or you can specify the columns:
40
- mysumstats = gl.Sumstats("t2d_bbj.txt.gz",
60
+ mysumstats = gl.Sumstats("sumstats.txt.gz",
41
61
  snpid="SNP",
42
62
  chrom="CHR",
43
63
  pos="POS",
44
64
  ea="ALT",
45
65
  nea="REF",
46
- neaf="Frq",
66
+ eaf="Frq",
47
67
  beta="BETA",
48
68
  se="SE",
49
69
  p="P",
@@ -56,26 +76,9 @@ mysumstats.plot_mqq()
56
76
  ...
57
77
  ```
58
78
 
59
- ### install in conda environment
60
-
61
- Create a Python 3.9 environment and install gwaslab using pip:
62
-
63
- ```
64
- conda env create -n gwaslab_test -c conda-forge python=3.9
65
- conda activate gwaslab
66
- pip install gwaslab==3.4.45
67
- ```
68
-
69
- or create a new environment using yml file [environment_3.4.40.yml](https://github.com/Cloufield/gwaslab/blob/main/environment_3.4.40.yml)
70
-
71
- ```
72
- conda env create -n gwaslab -f environment_3.4.40.yml
73
- ```
74
-
75
-
76
- ### install using docker
79
+ ## Documentation and tutorials
77
80
 
78
- A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main/docker/Dockerfile) for building local images.
81
+ Documentation and tutorials for GWASLab are avaiable at [here](https://cloufield.github.io/gwaslab/).
79
82
 
80
83
  ## Functions
81
84
 
@@ -122,7 +125,7 @@ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main
122
125
  - Scatter plot: allele frequency comparison
123
126
  - Scatter plot: trumpet plot (plot of MAF and effect size with power lines)
124
127
 
125
- ### Visualization Examples
128
+ #### Visualization Examples
126
129
 
127
130
  <img width="600" alt="image" src="https://user-images.githubusercontent.com/40289485/233836639-34b03c47-5a59-4fd4-9677-5e13b02aab15.png">
128
131
  <img width="600" alt="image" src="https://user-images.githubusercontent.com/40289485/197393168-e3e7076f-2801-4d66-9526-80778d44f3da.png">
@@ -138,42 +141,15 @@ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main
138
141
  - Sumstats summary: give you a quick overview of the sumstats.
139
142
  - ...
140
143
 
141
- ## Requirements (deprecated)
142
-
143
- environment.yml
144
+ ## Issues
144
145
 
145
- ```
146
- name: gwaslab
147
- channels:
148
- - conda-forge
149
- - defaults
150
- dependencies:
151
- - python=3.8.16=h7a1cb2a_3
152
- - jupyter==1.0.0
153
- - pip==23.1.2
154
- - pip:
155
- - adjusttext==0.8
156
- - biopython==1.81
157
- - gwaslab==3.4.16
158
- - liftover==1.1.16
159
- - matplotlib==3.7.1
160
- - numpy==1.24.2
161
- - pandas==1.4.4
162
- - scikit-allel==1.3.5
163
- - scikit-learn==1.2.2
164
- - scipy==1.10.1
165
- - seaborn==0.11.2
166
- - statsmodels==0.13
167
- - adjustText==0.8
168
- - pysam==0.19
169
- - pyensembl==2.2.3
170
- - h5py==3.10.0
171
- ```
146
+ - GWASLab is currently under active development, with frequent updates.
147
+ - Note: Known issues are documented at https://cloufield.github.io/gwaslab/KnownIssues/.
172
148
 
173
149
  ## How to cite
174
150
  - GWASLab preprint: He, Y., Koido, M., Shimmori, Y., Kamatani, Y. (2023). GWASLab: a Python package for processing and visualizing GWAS summary statistics. Preprint at Jxiv, 2023-5. https://doi.org/10.51094/jxiv.370
175
151
 
176
- ## Sample Data
152
+ ## Sample data used for tutorial
177
153
  - Sample GWAS data used in GWASLab is obtained from: http://jenger.riken.jp/ (Suzuki, Ken, et al. "Identification of 28 new susceptibility loci for type 2 diabetes in the Japanese population." Nature genetics 51.3 (2019): 379-386.).
178
154
 
179
155
  ## Acknowledgement
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
7
7
 
8
8
  [project]
9
9
  name = "gwaslab"
10
- version = "3.5.8"
10
+ version = "3.6.0"
11
11
  authors = [
12
12
  { name="Yunye", email="yunye@gwaslab.com" },
13
13
  ]
@@ -34,7 +34,7 @@ dependencies = [
34
34
  requires-python = ">=3.9,<3.13"
35
35
  classifiers = [
36
36
  "Programming Language :: Python :: 3",
37
- "License :: OSI Approved :: MIT License",
37
+ "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
38
38
  "Operating System :: OS Independent",
39
39
  ]
40
40
 
@@ -103,7 +103,9 @@
103
103
  "13to19":"https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/chm13v2-hg19.chain",
104
104
  "13to38":"https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/chm13v2-grch38.chain",
105
105
  "18to19":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz",
106
- "18to38":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg38.over.chain.gz"
106
+ "18to38":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg38.over.chain.gz",
107
+ "1kg_hm3_hg38_eaf":"https://www.dropbox.com/scl/fi/ymkqfsaec6mwjzlvxsm45/PAN.hapmap3.hg38.EAF.tsv.gz?rlkey=p1auef5y1kk7ui41k6j3s8b0z&dl=1",
108
+ "1kg_hm3_hg19_eaf":"https://www.dropbox.com/scl/fi/dmv9wtfchv6ahim86d49r/PAN.hapmap3.hg19.EAF.tsv.gz?rlkey=ywne2gj1rlm2nj42q9lt2d99n&dl=1"
107
109
  }
108
110
 
109
111
 
@@ -84,6 +84,8 @@ from gwaslab.util_ex_ldsc import _estimate_rg_by_ldsc
84
84
  from gwaslab.util_ex_ldsc import _estimate_h2_cts_by_ldsc
85
85
  from gwaslab.util_ex_ldsc import _estimate_partitioned_h2_by_ldsc
86
86
  from gwaslab.util_ex_ldproxyfinder import _extract_ld_proxy
87
+ from gwaslab.util_ex_run_magma import _run_magma
88
+ from gwaslab.util_ex_infer_ancestry import _infer_ancestry
87
89
  from gwaslab.bd_get_hapmap3 import gethapmap3
88
90
  from gwaslab.util_abf_finemapping import abf_finemapping
89
91
  from gwaslab.util_abf_finemapping import make_cs
@@ -674,6 +676,9 @@ class Sumstats():
674
676
  fig,outliers = plotdaf(self.data, **kwargs)
675
677
  return fig, outliers
676
678
 
679
+ def infer_ancestry(self, **kwargs):
680
+ self.meta["gwaslab"]["inferred_ancestry"] = _infer_ancestry(self.data, **kwargs)
681
+
677
682
  def plot_gwheatmap(self, **kwargs):
678
683
  fig = _gwheatmap(self.data, **kwargs)
679
684
  return fig
@@ -882,8 +887,14 @@ class Sumstats():
882
887
  if build is None:
883
888
  build = self.meta["gwaslab"]["genome_build"]
884
889
  insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
885
- _run_prscs(sst_file = insumstats[["rsID","CHR","POS","EA","NEA","BETA","SE"]], log=self.log, **kwargs)
886
-
890
+ _run_prscs(sst_file = insumstats[["rsID","CHR","POS","EA","NEA","BETA","SE"]],
891
+ log=self.log,
892
+ **kwargs)
893
+
894
+ def run_magma(self, build=None, verbose=True, **kwargs):
895
+ _run_magma(self.data,
896
+ study=self.meta["gwaslab"]["study_name"],
897
+ build=build, verbose=verbose, log=self.log, **kwargs)
887
898
  ## LDSC ##############################################################################################
888
899
  def estimate_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, how="right", **kwargs):
889
900
  if build is None:
@@ -6,7 +6,7 @@ def _init_meta(object="Sumstats"):
6
6
  "gwas_id":"Unknown",
7
7
  "samples":{
8
8
  "sample_size":"Unknown",
9
- "sample_ancestry":"European",
9
+ "sample_ancestry":"Unknown",
10
10
  "ancestry_method":"self-reported|genetically determined",
11
11
  } ,
12
12
  "trait_description":"Unknown",
@@ -37,6 +37,7 @@ def _init_meta(object="Sumstats"):
37
37
  "species":"homo sapiens",
38
38
  "genome_build":"99",
39
39
  "sample_prevalence":"Unknown",
40
+ "inferred_ancestry":"Unknown",
40
41
  "population_prevalence":"Unknown",
41
42
  "variants":{
42
43
  "variant_number":"Unknown",
@@ -15,8 +15,8 @@ def _get_version():
15
15
  def gwaslab_info():
16
16
  # version meta information
17
17
  dic={
18
- "version":"3.5.8",
19
- "release_date":"20250424"
18
+ "version":"3.6.0",
19
+ "release_date":"20250504"
20
20
  }
21
21
  return dic
22
22
 
@@ -0,0 +1,65 @@
1
+
2
+ import pandas as pd
3
+ from gwaslab.g_Log import Log
4
+
5
+ def _infer_ancestry(sumstats,
6
+ ancestry_af=None,
7
+ build="19",
8
+ log=Log(),
9
+ verbose=True):
10
+ log.write("Start to infer ancestry based on Fst...", verbose=verbose)
11
+ ref_af = pd.read_csv(ancestry_af, sep="\t")
12
+
13
+ data_af = pd.merge(sumstats[["CHR","POS","EA","NEA","EAF"]] ,ref_af,on=["CHR","POS"],how="inner")
14
+
15
+ log.write(f" -Estimating Fst using {len(data_af)} variants...", verbose=verbose)
16
+
17
+ is_filp = data_af["EA"] == data_af["ALT"]
18
+ data_af.loc[is_filp, ["EA","NEA"]] = data_af.loc[is_filp, ["NEA","EA"]]
19
+ data_af.loc[is_filp, "EAF"] = 1 - data_af.loc[is_filp, "EAF"]
20
+
21
+ headers = []
22
+ for i in ['GBR', 'FIN', 'CHS', 'PUR', 'CDX',
23
+ 'CLM', 'IBS', 'PEL', 'PJL', 'KHV', 'ACB', 'GWD', 'ESN', 'BEB', 'MSL',
24
+ 'STU', 'ITU', 'CEU', 'YRI', 'CHB', 'JPT', 'LWK', 'ASW', 'MXL', 'TSI',
25
+ 'GIH', 'EUR', 'EAS', 'AMR', 'SAS', 'AFR']:
26
+ headers.append(f"FST_{i}")
27
+ data_af[f"FST_{i}"] = data_af.apply(lambda x: calculate_fst(x["EAF"], x[i]), axis=1)
28
+
29
+ for i,value in data_af[headers].mean().sort_values().items():
30
+ log.write( f" -{i} : {value}", verbose=verbose)
31
+
32
+ closest_ancestry = data_af[headers].mean().sort_values().idxmin()
33
+
34
+ log.write(f" -Closest Ancestry: {closest_ancestry.split('_')[1]}", verbose=verbose)
35
+ log.write("Finished inferring ancestry.", verbose=verbose)
36
+ return closest_ancestry.split("_")[1]
37
+
38
+ def calculate_fst(p_1, p_2):
39
+ # https://bios1140.github.io/understanding-fst-the-fixation-index.html
40
+ # calculate q1 and q2
41
+ q_1 = 1 - p_1
42
+ q_2 = 1 - p_2
43
+
44
+ # calculate total allele frequency
45
+ p_t = (p_1 + p_2)/2
46
+ q_t = 1 - p_t
47
+
48
+ # calculate expected heterozygosity
49
+ # first calculate expected heterozygosity for the two populations
50
+ # pop1
51
+ hs_1 = 2*p_1*q_1
52
+ # pop2
53
+ hs_2 = 2*p_2*q_2
54
+ # then take the mean of this
55
+ hs = (hs_1 + hs_2)/2
56
+
57
+ # next calculate expected heterozygosity for the metapopulations
58
+ ht = 2*p_t*q_t
59
+
60
+ # calculate fst
61
+ fst = (ht - hs)/ht
62
+
63
+ # return output
64
+ return fst
65
+
@@ -0,0 +1,74 @@
1
+ import subprocess
2
+ import os
3
+ import gc
4
+ import pandas as pd
5
+ import numpy as np
6
+ from gwaslab.g_Log import Log
7
+ from gwaslab.util_in_filter_value import _exclude_hla
8
+
9
+ def _run_magma(sumstats,
10
+ magma="magma",
11
+ study="Study1",
12
+ exclude_hla=True,
13
+ window="35,10",
14
+ id_to_use="rsID",
15
+ ref=None,
16
+ ncbi=None,
17
+ set_annot=None,
18
+ out="./",
19
+ delete=True,
20
+ ncol="N",
21
+ build="19",
22
+ log=Log(),
23
+ verbose=True):
24
+
25
+ log.write(" Start to run magma from command line:", verbose=verbose)
26
+
27
+ if exclude_hla==True:
28
+ sumstats = _exclude_hla(sumstats, build =build)
29
+
30
+ snploc="{}{}.rsid.chr.pos.tsv".format(out,study)
31
+ pval="{}{}.rsid.p.n.tsv".format(out, study)
32
+
33
+ log.write(f" -writing temp file for --snp-loc:{snploc}", verbose=verbose)
34
+ sumstats.dropna()[[id_to_use,"CHR","POS"]].rename(columns={id_to_use:"SNP"}).to_csv("{}{}.rsid.chr.pos.tsv".format(out,study),index=None, sep="\t")
35
+
36
+ log.write(f" -writing temp file for --pval:{pval}", verbose=verbose)
37
+ sumstats.dropna()[[id_to_use,"P","N"]].rename(columns={id_to_use:"SNP"}).to_csv("{}{}.rsid.p.n.tsv".format(out,study),index=None, sep="\t")
38
+
39
+ log.write(f" --annotate window: {window}", verbose=verbose)
40
+ log.write(f" --gene-loc: {ncbi}", verbose=verbose)
41
+ log.write(f" --bfile: {ref}", verbose=verbose)
42
+ log.write(f" Output prefix: {out}", verbose=verbose)
43
+
44
+ bash_script=f'''
45
+
46
+ #!/bin/bash
47
+
48
+ {magma} --annotate window={window} --snp-loc {snploc} --gene-loc {ncbi} --out {study}
49
+
50
+ {magma} --bfile {ref} --pval {pval} ncol={ncol} --gene-annot {study}.genes.annot --out {study}
51
+
52
+ '''
53
+
54
+ if set_annot is not None:
55
+ bash_script+=f'''
56
+ {magma} --gene-results {study}.genes.raw --set-annot {set_annot} --out {study}
57
+ '''
58
+ log.write(f"Script: {bash_script}")
59
+
60
+ try:
61
+ log.write(" Running magma from command line...", verbose=verbose)
62
+ output = subprocess.check_output(bash_script, stderr=subprocess.STDOUT, shell=True,text=True)
63
+ output = output + "\n"
64
+
65
+ if delete == True:
66
+ os.remove(snploc)
67
+ os.remove(pval)
68
+
69
+ except subprocess.CalledProcessError as e:
70
+ log.warning("ERROR!")
71
+ log.write(e.output)
72
+
73
+ log.write("Finished running magma.", verbose=verbose)
74
+
@@ -26,6 +26,7 @@ def annotate_single(
26
26
  anno_alias,
27
27
  anno_style,
28
28
  anno_args,
29
+ anno_args_single,
29
30
  arm_scale,
30
31
  anno_max_iter,
31
32
  arm_scale_d,
@@ -216,13 +217,23 @@ def annotate_single(
216
217
  if anno_style == "tight" :
217
218
  anno_default["rotation"] = 90
218
219
  ################################################################################################################################
219
-
220
+ # anno args for all
220
221
  for key,value in anno_args.items():
221
222
  anno_default[key]=value
222
- if len(highlight_i) >0 and highlight_chrpos==True:
223
+
224
+ # anno args for highlight group
225
+ if len(highlight_i) >0:
223
226
  if row["i"] in highlight_i:
224
227
  for key,value in highlight_anno_args.items():
225
228
  anno_default[key]=value
229
+
230
+ # anno args for specifc
231
+ #try:
232
+ if row[snpid] in anno_args_single.keys():
233
+ for key,value in anno_args_single[row[snpid]].items():
234
+ anno_default[key]=value
235
+ #except:
236
+ # pass
226
237
  ################################################################################################################################
227
238
  if anno_adjust==True:
228
239
  if _invert==False:
@@ -154,6 +154,7 @@ def mqqplot(insumstats,
154
154
  anno_alias=None,
155
155
  anno_d=None,
156
156
  anno_args=None,
157
+ anno_args_single=None,
157
158
  anno_style="right",
158
159
  anno_fixed_arm_length=None,
159
160
  anno_source = "ensembl",
@@ -209,7 +210,9 @@ def mqqplot(insumstats,
209
210
  drop_chr_start=False,
210
211
  title =None,
211
212
  mtitle=None,
213
+ mtitle_pad=1.08,
212
214
  qtitle=None,
215
+ qtitle_pad=1.08,
213
216
  ylabel=None,
214
217
  xlabel=None,
215
218
  title_pad=1.08,
@@ -256,6 +259,7 @@ def mqqplot(insumstats,
256
259
  anno_alias = _update_args(anno_alias, dict())
257
260
  anno_d = _update_args(anno_d,dict())
258
261
  anno_args = _update_args(anno_args,dict())
262
+ anno_args_single = _update_args(anno_args_single,dict())
259
263
  arrow_kwargs = _update_args(arrow_kwargs,dict())
260
264
 
261
265
  colors = _update_arg(colors, ["#597FBD","#74BAD3"])
@@ -1002,7 +1006,7 @@ def mqqplot(insumstats,
1002
1006
 
1003
1007
 
1004
1008
  if mtitle and anno and len(to_annotate)>0:
1005
- pad=(ax1.transData.transform((skip, title_pad*maxy))[1]-ax1.transData.transform((skip, maxy)))[1]
1009
+ pad=(ax1.transData.transform((skip, mtitle_pad*maxy))[1]-ax1.transData.transform((skip, maxy)))[1]
1006
1010
  ax1.set_title(mtitle,pad=pad,fontsize=title_fontsize,family=font_family)
1007
1011
  elif mtitle:
1008
1012
  ax1.set_title(mtitle,fontsize=title_fontsize,family=font_family)
@@ -1023,6 +1027,7 @@ def mqqplot(insumstats,
1023
1027
  anno_alias=anno_alias,
1024
1028
  anno_style=anno_style,
1025
1029
  anno_args=anno_args,
1030
+ anno_args_single=anno_args_single,
1026
1031
  arm_scale=arm_scale,
1027
1032
  anno_max_iter=anno_max_iter,
1028
1033
  arm_scale_d=arm_scale_d,
@@ -1074,6 +1079,7 @@ def mqqplot(insumstats,
1074
1079
  fontsize=fontsize,
1075
1080
  font_family=font_family,
1076
1081
  qtitle=qtitle,
1082
+ qtitle_pad=qtitle_pad,
1077
1083
  title_fontsize=title_fontsize,
1078
1084
  include_chrXYMT=include_chrXYMT,
1079
1085
  cut_line_color=cut_line_color,
@@ -1106,9 +1112,10 @@ def mqqplot(insumstats,
1106
1112
  # Titles
1107
1113
  if title and anno and len(to_annotate)>0:
1108
1114
  # increase height if annotation
1109
- fig.suptitle(title , fontsize = title_fontsize ,x=0.5, y=1.05)
1115
+ fig.suptitle(title , fontsize = title_fontsize ,x=0.5, y=title_pad)
1110
1116
  else:
1111
- fig.suptitle(title , fontsize = title_fontsize, x=0.5,y=1)
1117
+ title_pad = title_pad -0.05
1118
+ fig.suptitle(title , fontsize = title_fontsize, x=0.5,y=title_pad)
1112
1119
  ## Add annotation arrows and texts
1113
1120
 
1114
1121
  # Saving figure