genal-python 0.0.dev0__tar.gz → 0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {genal_python-0.0.dev0 → genal_python-0.1}/PKG-INFO +24 -11
  2. {genal_python-0.0.dev0 → genal_python-0.1}/README.md +23 -10
  3. genal_python-0.1/docs/Images/MR_plot_SBP_AS.png +0 -0
  4. {genal_python-0.0.dev0 → genal_python-0.1}/genal/Geno.py +15 -15
  5. {genal_python-0.0.dev0 → genal_python-0.1}/genal/__init__.py +2 -0
  6. {genal_python-0.0.dev0 → genal_python-0.1}/genal/geno_tools.py +8 -8
  7. {genal_python-0.0.dev0 → genal_python-0.1}/pyproject.toml +1 -1
  8. {genal_python-0.0.dev0 → genal_python-0.1}/.gitignore +0 -0
  9. {genal_python-0.0.dev0 → genal_python-0.1}/LICENSE +0 -0
  10. {genal_python-0.0.dev0 → genal_python-0.1}/docs/Makefile +0 -0
  11. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/doctrees/environment.pickle +0 -0
  12. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/doctrees/index.doctree +0 -0
  13. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/doctrees/source/genal.doctree +0 -0
  14. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/doctrees/source/modules.doctree +0 -0
  15. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/.buildinfo +0 -0
  16. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_sources/index.rst.txt +0 -0
  17. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_sources/source/genal.rst.txt +0 -0
  18. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_sources/source/modules.rst.txt +0 -0
  19. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/_sphinx_javascript_frameworks_compat.js +0 -0
  20. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/basic.css +0 -0
  21. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/badge_only.css +0 -0
  22. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff +0 -0
  23. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff2 +0 -0
  24. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff +0 -0
  25. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff2 +0 -0
  26. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/fontawesome-webfont.eot +0 -0
  27. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/fontawesome-webfont.svg +0 -0
  28. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/fontawesome-webfont.ttf +0 -0
  29. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff +0 -0
  30. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff2 +0 -0
  31. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-bold-italic.woff +0 -0
  32. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-bold-italic.woff2 +0 -0
  33. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-bold.woff +0 -0
  34. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-bold.woff2 +0 -0
  35. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-normal-italic.woff +0 -0
  36. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-normal-italic.woff2 +0 -0
  37. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-normal.woff +0 -0
  38. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-normal.woff2 +0 -0
  39. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/theme.css +0 -0
  40. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/doctools.js +0 -0
  41. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/documentation_options.js +0 -0
  42. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/file.png +0 -0
  43. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/jquery.js +0 -0
  44. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/js/badge_only.js +0 -0
  45. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/js/html5shiv-printshiv.min.js +0 -0
  46. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/js/html5shiv.min.js +0 -0
  47. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/js/theme.js +0 -0
  48. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/language_data.js +0 -0
  49. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/minus.png +0 -0
  50. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/plus.png +0 -0
  51. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/pygments.css +0 -0
  52. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/searchtools.js +0 -0
  53. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/sphinx_highlight.js +0 -0
  54. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/genindex.html +0 -0
  55. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/index.html +0 -0
  56. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/objects.inv +0 -0
  57. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/py-modindex.html +0 -0
  58. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/search.html +0 -0
  59. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/searchindex.js +0 -0
  60. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/source/genal.html +0 -0
  61. {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/source/modules.html +0 -0
  62. {genal_python-0.0.dev0 → genal_python-0.1}/docs/make.bat +0 -0
  63. {genal_python-0.0.dev0 → genal_python-0.1}/docs/requirements.txt +0 -0
  64. {genal_python-0.0.dev0 → genal_python-0.1}/docs/source/api.rst +0 -0
  65. {genal_python-0.0.dev0 → genal_python-0.1}/docs/source/conf.py +0 -0
  66. {genal_python-0.0.dev0 → genal_python-0.1}/docs/source/genal.rst +0 -0
  67. {genal_python-0.0.dev0 → genal_python-0.1}/docs/source/index.rst +0 -0
  68. {genal_python-0.0.dev0 → genal_python-0.1}/docs/source/introduction.rst +0 -0
  69. {genal_python-0.0.dev0 → genal_python-0.1}/docs/source/modules.rst +0 -0
  70. {genal_python-0.0.dev0 → genal_python-0.1}/genal/MR.py +0 -0
  71. {genal_python-0.0.dev0 → genal_python-0.1}/genal/MR_tools.py +0 -0
  72. {genal_python-0.0.dev0 → genal_python-0.1}/genal/MRpresso.py +0 -0
  73. {genal_python-0.0.dev0 → genal_python-0.1}/genal/association.py +0 -0
  74. {genal_python-0.0.dev0 → genal_python-0.1}/genal/clump.py +0 -0
  75. {genal_python-0.0.dev0 → genal_python-0.1}/genal/constants.py +0 -0
  76. {genal_python-0.0.dev0 → genal_python-0.1}/genal/extract_prs.py +0 -0
  77. {genal_python-0.0.dev0 → genal_python-0.1}/genal/lift.py +0 -0
  78. {genal_python-0.0.dev0 → genal_python-0.1}/genal/proxy.py +0 -0
  79. {genal_python-0.0.dev0 → genal_python-0.1}/genal/tools.py +0 -0
  80. {genal_python-0.0.dev0 → genal_python-0.1}/gitignore +0 -0
  81. {genal_python-0.0.dev0 → genal_python-0.1}/readthedocs.yaml +0 -0
  82. {genal_python-0.0.dev0 → genal_python-0.1}/requirements.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: genal-python
3
- Version: 0.0.dev0
3
+ Version: 0.1
4
4
  Summary: A python toolkit for polygenic risk scoring and mendelian randomization.
5
5
  Author-email: Cyprien Rivier <riviercyprien@gmail.com>
6
6
  Requires-Python: >=3.7
@@ -51,7 +51,7 @@ The module prioritizes user-friendliness and intuitive operation, aiming to redu
51
51
  Genal draws on concepts from well-established R packages such as TwoSampleMR, MR-Presso, MendelianRandomization, and gwasvcf, adapting their proven methodologies to the Python environment. This approach ensures that users have access to tried and tested techniques with the versatility of Python's data science tools.
52
52
 
53
53
  ## Requirements for the GENAL module <a name="paragraph1"></a>
54
- ***Python 3.7 or later***. https://www.python.org/ or https://www.python.org/downloads/release/python-379/ <br>
54
+ ***Python 3.9 or later***. https://www.python.org/ <br>
55
55
 
56
56
 
57
57
  ## Installation and How to use the GENAL module <a name="paragraph2"></a>
@@ -60,7 +60,11 @@ Genal draws on concepts from well-established R packages such as TwoSampleMR, MR
60
60
 
61
61
  Download and install the package with pip:
62
62
  ```
63
- pip install genal
63
+ pip install genal-python
64
+ ```
65
+ And it can be imported in a python environment with:
66
+ ```python
67
+ import genal
64
68
  ```
65
69
 
66
70
  The main genal functionalities require a working installation of PLINK v1.9 that can be downloaded here: https://www.cog-genomics.org/plink/
@@ -81,6 +85,7 @@ For this tutorial, we will build a Polygenic Risk Score (PRS) for systolic blood
81
85
  - Include risk score calculations with proxies
82
86
  - Perform Mendelian Randomization
83
87
  - Analyze SBP as an exposure and acute ischemic stroke as an outcome
88
+ - Plot the results
84
89
  - Conduct sensitivity analyses using the weighted median, MR-Egger, and MR-PRESSO methods
85
90
  - Calibrate SNP-trait weights with individual-level genetic data
86
91
  - Execute single-SNP association tests for calibrating SBP genetic instruments
@@ -155,15 +160,15 @@ Now that we have loaded the data into a `genal.Geno` object, we can begin cleani
155
160
  Genal can run all the basic cleaning and preprocessing steps in one command:
156
161
 
157
162
  ```python
158
- SBP_Geno.preprocess_data(preprocessing = 2)
163
+ SBP_Geno.preprocess_data(preprocessing = 'Fill_delete')
159
164
  ```
160
165
 
161
166
  The `preprocessing` argument specifies the global level of preprocessing applied to the data:
162
- - `preprocessing = 0`: The data won't be modified.
163
- - `preprocessing = 1`: Missing columns will be added based on reference data and invalid values set to NaN, but no rows will be deleted.
164
- - `preprocessing = 2`: Missing columns will be added, and all rows containing missing, duplicated, or invalid values will be deleted. This option is recommended before running genetic methods.
167
+ - `preprocessing = 'None'`: The data won't be modified.
168
+ - `preprocessing = 'Fill'`: Missing columns will be added based on reference data and invalid values set to NaN, but no rows will be deleted.
169
+ - `preprocessing = 'Fill_delete'`: Missing columns will be added, and all rows containing missing, duplicated, or invalid values will be deleted. This option is recommended before running genetic methods.
165
170
 
166
- By default, and depending on the global preprocessing level (0, 1, 2) chosen, the `preprocess_data` method of `genal.Geno` will run the following checks:
171
+ By default, and depending on the global preprocessing level ('None', 'Fill', 'Fill_delete') chosen, the `preprocess_data` method of `genal.Geno` will run the following checks:
167
172
  - Ensure the CHR (chromosome) and POS (genomic position) columns are integers.
168
173
  - Ensure the EA (effect allele) and NEA (non-effect allele) columns are uppercase characters containing A, T, C, G letters. Multiallelic values are set to NaN.
169
174
  - Validate the P (p-value) column for proper values.
@@ -202,7 +207,7 @@ And we see that the SNP column with the rsids has been added based on the refere
202
207
  You do not need to obtain the 1000 genome reference panel yourself, Genal will download it the first time you use it. By default, the reference panel used is the european (eur) one. You can specify another valid reference panel (afr, eas, sas, amr) with the reference_panel argument:
203
208
 
204
209
  ```python
205
- SBP_Geno.preprocess_data(preprocessing = 2, reference_panel = "afr")
210
+ SBP_Geno.preprocess_data(preprocessing = 'Fill_delete', reference_panel = "afr")
206
211
  ```
207
212
 
208
213
  You can also use a custom reference panel by specifying to the reference_panel argument a path to bed/bim/fam files (without the extension).
@@ -366,7 +371,7 @@ Stroke_Geno = genal.Geno(stroke_gwas, CHR = "chromosome", POS = "base_pair_locat
366
371
  We preprocess it as well to put it in the correct format and make sure there is no invalid values:
367
372
 
368
373
  ```python
369
- Stroke_Geno.preprocess_data(preprocessing = 2)
374
+ Stroke_Geno.preprocess_data(preprocessing = 'Fill_delete')
370
375
  ```
371
376
 
372
377
  Now, we need to extract our instruments (SNPs of the SBP_clumped data) from the outcome data to obtain their association with the outcome trait (stroke). It can be done by calling the `genal.Geno.query_outcome` method:
@@ -431,8 +436,16 @@ By default, all MR methods (inverse-variance weighted, weighted median, MR-Egger
431
436
 
432
437
  For more fine-tuning, such as settings for the number of boostrapping iterations, please refer to the API.
433
438
 
434
- If you wish to include the heterogeneity values (Cochran's Q), you can use the heterogeneity argument. Here, the heterogeneity for the inverse-variance weighted method:
439
+ If you want to visualize the obtained MR results, you can use the `genal.Geno.MR_plot` method that will plot each SNP in an effect_on_exposure x effect_on_outcome plane as well as lines corresponding to different MR methods:
440
+
441
+ ```python
442
+ SBP_clumped.MR_plot(filename="MR_plot_SBP_AS")
443
+ ```
444
+
445
+ ![MR plot](docs/Images/MR_plot_SBP_AS.png)
446
+ You can select which MR methods you wish to plot with the 'methods' argument. Note that for an MR method to be plotted, they must be included in the latest `genal.Geno.MR` call of this `genal.Geno` object.
435
447
 
448
+ If you wish to include the heterogeneity values (Cochran's Q) in the results, you can use the heterogeneity argument in the `genal.Geno.MR` call. Here, the heterogeneity for the inverse-variance weighted method:
436
449
 
437
450
  ```python
438
451
  SBP_clumped.MR(action = 3, methods = ["IVW"], exposure_name = "SBP", outcome_name = "Stroke_eur", heterogeneity = True)
@@ -29,7 +29,7 @@ The module prioritizes user-friendliness and intuitive operation, aiming to redu
29
29
  Genal draws on concepts from well-established R packages such as TwoSampleMR, MR-Presso, MendelianRandomization, and gwasvcf, adapting their proven methodologies to the Python environment. This approach ensures that users have access to tried and tested techniques with the versatility of Python's data science tools.
30
30
 
31
31
  ## Requirements for the GENAL module <a name="paragraph1"></a>
32
- ***Python 3.7 or later***. https://www.python.org/ or https://www.python.org/downloads/release/python-379/ <br>
32
+ ***Python 3.9 or later***. https://www.python.org/ <br>
33
33
 
34
34
 
35
35
  ## Installation and How to use the GENAL module <a name="paragraph2"></a>
@@ -38,7 +38,11 @@ Genal draws on concepts from well-established R packages such as TwoSampleMR, MR
38
38
 
39
39
  Download and install the package with pip:
40
40
  ```
41
- pip install genal
41
+ pip install genal-python
42
+ ```
43
+ And it can be imported in a python environment with:
44
+ ```python
45
+ import genal
42
46
  ```
43
47
 
44
48
  The main genal functionalities require a working installation of PLINK v1.9 that can be downloaded here: https://www.cog-genomics.org/plink/
@@ -59,6 +63,7 @@ For this tutorial, we will build a Polygenic Risk Score (PRS) for systolic blood
59
63
  - Include risk score calculations with proxies
60
64
  - Perform Mendelian Randomization
61
65
  - Analyze SBP as an exposure and acute ischemic stroke as an outcome
66
+ - Plot the results
62
67
  - Conduct sensitivity analyses using the weighted median, MR-Egger, and MR-PRESSO methods
63
68
  - Calibrate SNP-trait weights with individual-level genetic data
64
69
  - Execute single-SNP association tests for calibrating SBP genetic instruments
@@ -133,15 +138,15 @@ Now that we have loaded the data into a `genal.Geno` object, we can begin cleani
133
138
  Genal can run all the basic cleaning and preprocessing steps in one command:
134
139
 
135
140
  ```python
136
- SBP_Geno.preprocess_data(preprocessing = 2)
141
+ SBP_Geno.preprocess_data(preprocessing = 'Fill_delete')
137
142
  ```
138
143
 
139
144
  The `preprocessing` argument specifies the global level of preprocessing applied to the data:
140
- - `preprocessing = 0`: The data won't be modified.
141
- - `preprocessing = 1`: Missing columns will be added based on reference data and invalid values set to NaN, but no rows will be deleted.
142
- - `preprocessing = 2`: Missing columns will be added, and all rows containing missing, duplicated, or invalid values will be deleted. This option is recommended before running genetic methods.
145
+ - `preprocessing = 'None'`: The data won't be modified.
146
+ - `preprocessing = 'Fill'`: Missing columns will be added based on reference data and invalid values set to NaN, but no rows will be deleted.
147
+ - `preprocessing = 'Fill_delete'`: Missing columns will be added, and all rows containing missing, duplicated, or invalid values will be deleted. This option is recommended before running genetic methods.
143
148
 
144
- By default, and depending on the global preprocessing level (0, 1, 2) chosen, the `preprocess_data` method of `genal.Geno` will run the following checks:
149
+ By default, and depending on the global preprocessing level ('None', 'Fill', 'Fill_delete') chosen, the `preprocess_data` method of `genal.Geno` will run the following checks:
145
150
  - Ensure the CHR (chromosome) and POS (genomic position) columns are integers.
146
151
  - Ensure the EA (effect allele) and NEA (non-effect allele) columns are uppercase characters containing A, T, C, G letters. Multiallelic values are set to NaN.
147
152
  - Validate the P (p-value) column for proper values.
@@ -180,7 +185,7 @@ And we see that the SNP column with the rsids has been added based on the refere
180
185
  You do not need to obtain the 1000 genome reference panel yourself, Genal will download it the first time you use it. By default, the reference panel used is the european (eur) one. You can specify another valid reference panel (afr, eas, sas, amr) with the reference_panel argument:
181
186
 
182
187
  ```python
183
- SBP_Geno.preprocess_data(preprocessing = 2, reference_panel = "afr")
188
+ SBP_Geno.preprocess_data(preprocessing = 'Fill_delete', reference_panel = "afr")
184
189
  ```
185
190
 
186
191
  You can also use a custom reference panel by specifying to the reference_panel argument a path to bed/bim/fam files (without the extension).
@@ -344,7 +349,7 @@ Stroke_Geno = genal.Geno(stroke_gwas, CHR = "chromosome", POS = "base_pair_locat
344
349
  We preprocess it as well to put it in the correct format and make sure there is no invalid values:
345
350
 
346
351
  ```python
347
- Stroke_Geno.preprocess_data(preprocessing = 2)
352
+ Stroke_Geno.preprocess_data(preprocessing = 'Fill_delete')
348
353
  ```
349
354
 
350
355
  Now, we need to extract our instruments (SNPs of the SBP_clumped data) from the outcome data to obtain their association with the outcome trait (stroke). It can be done by calling the `genal.Geno.query_outcome` method:
@@ -409,8 +414,16 @@ By default, all MR methods (inverse-variance weighted, weighted median, MR-Egger
409
414
 
410
415
  For more fine-tuning, such as settings for the number of boostrapping iterations, please refer to the API.
411
416
 
412
- If you wish to include the heterogeneity values (Cochran's Q), you can use the heterogeneity argument. Here, the heterogeneity for the inverse-variance weighted method:
417
+ If you want to visualize the obtained MR results, you can use the `genal.Geno.MR_plot` method that will plot each SNP in an effect_on_exposure x effect_on_outcome plane as well as lines corresponding to different MR methods:
418
+
419
+ ```python
420
+ SBP_clumped.MR_plot(filename="MR_plot_SBP_AS")
421
+ ```
422
+
423
+ ![MR plot](docs/Images/MR_plot_SBP_AS.png)
424
+ You can select which MR methods you wish to plot with the 'methods' argument. Note that for an MR method to be plotted, they must be included in the latest `genal.Geno.MR` call of this `genal.Geno` object.
413
425
 
426
+ If you wish to include the heterogeneity values (Cochran's Q) in the results, you can use the heterogeneity argument in the `genal.Geno.MR` call. Here, the heterogeneity for the inverse-variance weighted method:
414
427
 
415
428
  ```python
416
429
  SBP_clumped.MR(action = 3, methods = ["IVW"], exposure_name = "SBP", outcome_name = "Stroke_eur", heterogeneity = True)
@@ -167,7 +167,7 @@ class Geno:
167
167
 
168
168
  def preprocess_data(
169
169
  self,
170
- preprocessing=1,
170
+ preprocessing='Fill',
171
171
  reference_panel="eur",
172
172
  effect_column=None,
173
173
  keep_multi=None,
@@ -179,11 +179,11 @@ class Geno:
179
179
  Clean and preprocess the main dataframe of Single Nucleotide Polymorphisms (SNP) data.
180
180
 
181
181
  Args:
182
- preprocessing (int, optional): Level of preprocessing to apply. Options include:
183
- - 0: The dataframe is not modified.
184
- - 1: Missing columns are added based on reference data and invalid values set to NaN, but no rows are deleted.
185
- - 2: Missing columns are added, and rows with missing, duplicated, or invalid values are deleted.
186
- Defaults to 1.
182
+ preprocessing (str, optional): Level of preprocessing to apply. Options include:
183
+ - "None": The dataframe is not modified.
184
+ - "Fill": Missing columns are added based on reference data and invalid values set to NaN, but no rows are deleted.
185
+ - "Fill_delete": Missing columns are added, and rows with missing, duplicated, or invalid values are deleted.
186
+ Defaults to 'Fill'.
187
187
  reference_panel (str or pd.DataFrame, optional): Reference panel for SNP adjustments. Can be a string representing ancestry classification ("eur", "afr", "eas", "sas", "amr") or a DataFrame with ["CHR","SNP","POS","A1","A2"] columns or a path to a .bim file. Defaults to "eur".
188
188
  effect_column (str, optional): Specifies the type of effect column ("BETA" or "OR"). If None, the method tries to determine it. Odds Ratios will be log-transformed and the standard error adjusted. Defaults to None.
189
189
  keep_multi (bool, optional): Determines if multiallelic SNPs should be kept. If None, defers to preprocessing value. Defaults to None.
@@ -207,7 +207,7 @@ class Geno:
207
207
 
208
208
  # Ensure CHR and POS columns are integers if preprocessing is enabled
209
209
  for int_col in ["CHR", "POS"]:
210
- if int_col in data.columns and preprocessing > 0:
210
+ if int_col in data.columns and preprocessing in ['Fill', 'Fill_delete']:
211
211
  check_int_column(data, int_col)
212
212
  self.checks[int_col] = True
213
213
 
@@ -237,7 +237,7 @@ class Geno:
237
237
  and "NEA" not in data.columns
238
238
  and "EA" in data.columns
239
239
  )
240
- if missing_nea_condition and preprocessing > 0:
240
+ if missing_nea_condition and preprocessing in ['Fill', 'Fill_delete']:
241
241
  data = fill_nea(data, self.get_reference_panel(reference_panel))
242
242
 
243
243
  # Fill missing EA and NEA columns from reference data if necessary and preprocessing is enabled
@@ -247,7 +247,7 @@ class Geno:
247
247
  and "NEA" not in data.columns
248
248
  and "EA" not in data.columns
249
249
  )
250
- if missing_ea_nea_condition and preprocessing > 0:
250
+ if missing_ea_nea_condition and preprocessing in ['Fill', 'Fill_delete']:
251
251
  data = fill_ea_nea(data, self.get_reference_panel(reference_panel))
252
252
 
253
253
  # Convert effect column to Beta estimates if present
@@ -256,18 +256,18 @@ class Geno:
256
256
  self.checks["BETA"] = True
257
257
 
258
258
  # Ensure P column contains valid values
259
- if "P" in data.columns and preprocessing > 0:
259
+ if "P" in data.columns and preprocessing in ['Fill', 'Fill_delete']:
260
260
  check_p_column(data)
261
261
  self.checks["P"] = True
262
262
 
263
263
  # Fill missing SE or P columns if necessary
264
- if preprocessing > 0:
264
+ if preprocessing in ['Fill', 'Fill_delete']:
265
265
  fill_se_p(data)
266
266
 
267
267
  # Process allele columns
268
268
  for allele_col in ["EA", "NEA"]:
269
269
  check_allele_condition = (allele_col in data.columns) and (
270
- (preprocessing > 0) or (not keep_multi)
270
+ (preprocessing in ['Fill', 'Fill_delete']) or (not keep_multi)
271
271
  )
272
272
  if check_allele_condition:
273
273
  check_allele_column(data, allele_col, keep_multi)
@@ -285,8 +285,8 @@ class Geno:
285
285
  f"Warning: the data doesn't include a {column} column. This may become an issue later on."
286
286
  )
287
287
 
288
- # Remove missing values if preprocessing level is set to 2
289
- if preprocessing == 2:
288
+ # Remove missing values if preprocessing level is set to 'Fill_delete'
289
+ if preprocessing == 'Fill_delete':
290
290
  remove_na(data)
291
291
  self.checks["NA_removal"] = True
292
292
 
@@ -547,7 +547,7 @@ class Geno:
547
547
  if "EA" not in self.checks:
548
548
  check_allele_column(data_prs, "EA", keep_multi=False)
549
549
  if "BETA" not in self.checks:
550
- check_beta_column(data_prs, effect_column=None, preprocessing=2)
550
+ check_beta_column(data_prs, effect_column=None, preprocessing='Fill_delete')
551
551
 
552
552
  initial_rows = data_prs.shape[0]
553
553
  data_prs.dropna(subset=["SNP", "P", "BETA"], inplace=True)
@@ -3,6 +3,8 @@ import json
3
3
  from .tools import default_config, write_config, set_plink
4
4
  from .geno_tools import delete_tmp
5
5
 
6
+ __version__ = "0.1"
7
+
6
8
  config_dir = os.path.expanduser(
7
9
  "~/.genal/"
8
10
  ) # Don't forget to change the config_path dans tools.py
@@ -21,7 +21,7 @@ def remove_na(data):
21
21
  n_del = nrows - data.shape[0]
22
22
  if n_del > 0:
23
23
  print(
24
- f"Deleted {n_del}({n_del/nrows*100:.3f}%) rows containing NA values in columns {columns_na}. Use preprocessing = 1 to keep the rows containing NA values."
24
+ f"Deleted {n_del}({n_del/nrows*100:.3f}%) rows containing NA values in columns {columns_na}. Use preprocessing = 'Fill' to keep the rows containing NA values."
25
25
  )
26
26
  return
27
27
 
@@ -100,7 +100,7 @@ def check_beta_column(data, effect_column, preprocessing):
100
100
  If no effect_column argument is specified, determine if the BETA column are beta estimates or odds ratios.
101
101
  """
102
102
  if effect_column is None:
103
- if preprocessing == 0:
103
+ if preprocessing == 'None':
104
104
  return data
105
105
  median = np.median(data.BETA)
106
106
  if 0.5 < median < 1.5:
@@ -303,9 +303,9 @@ def check_arguments(
303
303
  """
304
304
 
305
305
  # Validate preprocessing value
306
- if preprocessing not in [0, 1, 2]:
306
+ if preprocessing not in ['None', 'Fill', 'Fill_delete']:
307
307
  raise ValueError(
308
- "preprocessing must be one of [0, 1, 2]. Refer to the Geno class docstring for details."
308
+ "preprocessing must be one of ['None', 'Fill', 'Fill_delete']. Refer to the Geno class docstring for details."
309
309
  )
310
310
 
311
311
  # Validate effect_column value
@@ -326,11 +326,11 @@ def check_arguments(
326
326
  # Helper functions for preprocessing logic
327
327
  def keeptype_column(arg):
328
328
  """Helper function to decide whether to keep multi-values/duplicates."""
329
- return True if arg is None and preprocessing < 2 else arg
329
+ return True if arg is None and preprocessing in ['None', 'Fill'] else arg
330
330
 
331
331
  def filltype_column(arg):
332
332
  """Helper function to decide whether to fill snpids/coordinates."""
333
- return False if arg is None and preprocessing == 0 else arg
333
+ return False if arg is None and preprocessing == 'None' else arg
334
334
 
335
335
  # Apply preprocessing logic
336
336
  keep_multi = keeptype_column(keep_multi)
@@ -385,7 +385,7 @@ def save_data(data, name, path="", fmt="h5", sep="\t", header=True):
385
385
  print(f"Data saved to {path_name}")
386
386
 
387
387
 
388
- def Combine_Geno(Gs, name="noname", clumped=False, preprocessing=0):
388
+ def Combine_Geno(Gs, name="noname", clumped=False, preprocessing='None'):
389
389
  """
390
390
  Combine a list of GWAS objects into one.
391
391
 
@@ -393,7 +393,7 @@ def Combine_Geno(Gs, name="noname", clumped=False, preprocessing=0):
393
393
  - Gs (list): List of GWAS objects.
394
394
  - name (str, optional): Name for the combined object. Default is "noname".
395
395
  - clumped (bool, optional): If True, uses the clumped data of each object. Default is False.
396
- - preprocessing (int, optional): Level of preprocessing to apply. Default is 0.
396
+ - preprocessing (int, optional): Level of preprocessing to apply. Default is 'None'.
397
397
 
398
398
  Returns:
399
399
  Geno object: Combined Geno object.
@@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"
4
4
 
5
5
  [project]
6
6
  name = "genal-python" # Updated name for PyPI
7
- version = "0.0.dev0"
7
+ version = "0.1"
8
8
  authors = [{name = "Cyprien Rivier", email = "riviercyprien@gmail.com"}]
9
9
  description = "A python toolkit for polygenic risk scoring and mendelian randomization."
10
10
  readme = "README.md"
File without changes
File without changes
File without changes
File without changes