genal-python 1.5.0__tar.gz → 1.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {genal_python-1.5.0 → genal_python-1.5.1}/.gitignore +5 -1
  2. genal_python-1.5.0/README.md → genal_python-1.5.1/PKG-INFO +82 -6
  3. genal_python-1.5.0/PKG-INFO → genal_python-1.5.1/README.md +49 -35
  4. {genal_python-1.5.0 → genal_python-1.5.1}/docs/source/concepts.md +8 -4
  5. {genal_python-1.5.0 → genal_python-1.5.1}/docs/source/methods.md +41 -2
  6. {genal_python-1.5.0 → genal_python-1.5.1}/docs/source/workflows.md +69 -2
  7. {genal_python-1.5.0 → genal_python-1.5.1}/genal/Geno.py +228 -152
  8. {genal_python-1.5.0 → genal_python-1.5.1}/genal/MR.py +38 -138
  9. {genal_python-1.5.0 → genal_python-1.5.1}/genal/MR_tools.py +236 -13
  10. {genal_python-1.5.0 → genal_python-1.5.1}/genal/MRpresso.py +82 -68
  11. {genal_python-1.5.0 → genal_python-1.5.1}/genal/__init__.py +1 -1
  12. {genal_python-1.5.0 → genal_python-1.5.1}/genal/association.py +41 -45
  13. {genal_python-1.5.0 → genal_python-1.5.1}/genal/extract_prs.py +9 -2
  14. {genal_python-1.5.0 → genal_python-1.5.1}/genal/geno_tools.py +2 -2
  15. genal_python-1.5.1/genal/plots.py +1088 -0
  16. {genal_python-1.5.0 → genal_python-1.5.1}/pyproject.toml +8 -1
  17. genal_python-1.5.1/pytest.ini +12 -0
  18. {genal_python-1.5.0 → genal_python-1.5.1}/.DS_Store +0 -0
  19. {genal_python-1.5.0 → genal_python-1.5.1}/.readthedocs.yaml +0 -0
  20. {genal_python-1.5.0 → genal_python-1.5.1}/Genal_flowchart.png +0 -0
  21. {genal_python-1.5.0 → genal_python-1.5.1}/LICENSE +0 -0
  22. {genal_python-1.5.0 → genal_python-1.5.1}/docs/.DS_Store +0 -0
  23. {genal_python-1.5.0 → genal_python-1.5.1}/docs/Makefile +0 -0
  24. {genal_python-1.5.0 → genal_python-1.5.1}/docs/make.bat +0 -0
  25. {genal_python-1.5.0 → genal_python-1.5.1}/docs/requirements.txt +0 -0
  26. {genal_python-1.5.0 → genal_python-1.5.1}/docs/source/.DS_Store +0 -0
  27. {genal_python-1.5.0 → genal_python-1.5.1}/docs/source/Images/Genal_flowchart.png +0 -0
  28. {genal_python-1.5.0 → genal_python-1.5.1}/docs/source/Images/MR_plot_SBP_AS.png +0 -0
  29. {genal_python-1.5.0 → genal_python-1.5.1}/docs/source/Images/genal_logo.png +0 -0
  30. {genal_python-1.5.0 → genal_python-1.5.1}/docs/source/api.md +0 -0
  31. {genal_python-1.5.0 → genal_python-1.5.1}/docs/source/conf.py +0 -0
  32. {genal_python-1.5.0 → genal_python-1.5.1}/docs/source/faq.md +0 -0
  33. {genal_python-1.5.0 → genal_python-1.5.1}/docs/source/index.md +0 -0
  34. {genal_python-1.5.0 → genal_python-1.5.1}/docs/source/introduction.md +0 -0
  35. {genal_python-1.5.0 → genal_python-1.5.1}/docs/source/setup.md +0 -0
  36. {genal_python-1.5.0 → genal_python-1.5.1}/genal/clump.py +0 -0
  37. {genal_python-1.5.0 → genal_python-1.5.1}/genal/colocalization.py +0 -0
  38. {genal_python-1.5.0 → genal_python-1.5.1}/genal/constants.py +0 -0
  39. {genal_python-1.5.0 → genal_python-1.5.1}/genal/genes.py +0 -0
  40. {genal_python-1.5.0 → genal_python-1.5.1}/genal/lift.py +0 -0
  41. {genal_python-1.5.0 → genal_python-1.5.1}/genal/proxy.py +0 -0
  42. {genal_python-1.5.0 → genal_python-1.5.1}/genal/snp_query.py +0 -0
  43. {genal_python-1.5.0 → genal_python-1.5.1}/genal/tools.py +0 -0
  44. {genal_python-1.5.0 → genal_python-1.5.1}/genal_logo.png +0 -0
  45. {genal_python-1.5.0 → genal_python-1.5.1}/gitignore +0 -0
@@ -1,14 +1,18 @@
1
+ .DS_Store
1
2
  __pycache__/
3
+ .pytest_cache/
4
+ .coverage
5
+ htmlcov/
2
6
  dist/
3
7
  .ipynb_checkpoints/
4
8
  ipynb_checkpoints/
5
9
  genal/.ipynb_checkpoints/
6
10
  test_data/
7
11
  cursor/
8
- tests/
9
12
  tmp_GENAL/
10
13
  docs/build/
11
14
  docs/_build/
12
15
  REVIEW_REPORT.md
13
16
  TASKS.md
14
17
  code_concatenated
18
+ tests/
@@ -1,3 +1,35 @@
1
+ Metadata-Version: 2.4
2
+ Name: genal-python
3
+ Version: 1.5.1
4
+ Summary: A python toolkit for polygenic risk scoring and mendelian randomization.
5
+ Author-email: Cyprien Rivier <riviercyprien@gmail.com>
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
10
+ Classifier: Operating System :: OS Independent
11
+ License-File: LICENSE
12
+ Requires-Dist: aiohttp>=3.7
13
+ Requires-Dist: nest_asyncio>=1.5
14
+ Requires-Dist: numpy>=1.17.3
15
+ Requires-Dist: pandas>=1.0
16
+ Requires-Dist: plotnine>=0.9
17
+ Requires-Dist: psutil>=5.0
18
+ Requires-Dist: requests>=2.0
19
+ Requires-Dist: pyliftover>=0.4
20
+ Requires-Dist: scikit_learn>=0.24
21
+ Requires-Dist: scipy>=1.7,<1.13
22
+ Requires-Dist: statsmodels>=0.13,<0.15
23
+ Requires-Dist: tqdm>=4.38
24
+ Requires-Dist: wget>=3.0
25
+ Requires-Dist: fastparquet>=0.4
26
+ Requires-Dist: pyarrow>=3.0
27
+ Requires-Dist: pytest>=7.0 ; extra == "test"
28
+ Requires-Dist: pytest-cov ; extra == "test"
29
+ Requires-Dist: pytest-xdist ; extra == "test"
30
+ Project-URL: Home, https://github.com/CypRiv/genal
31
+ Provides-Extra: test
32
+
1
33
  <div align="center">
2
34
  <img src="genal_logo.png" height="80" alt="genal logo" />
3
35
  <h1>genal</h1>
@@ -123,7 +155,7 @@ G_instruments.query_outcome(G_outcome, proxy=True, reference_panel="EUR_37")
123
155
  mr_results = G_instruments.MR(action=2, heterogeneity=True, odds=False)
124
156
 
125
157
  # 5.5) Plot MR results
126
- G_instruments.MR_plot(filename="mr_scatter")
158
+ G_instruments.MR_plot(filename="mr_scatter", figure_size=(10, 6))
127
159
  ```
128
160
  ## Core concept: the `Geno` object
129
161
 
@@ -133,6 +165,7 @@ G_instruments.MR_plot(filename="mr_scatter")
133
165
  - `G.phenotype`: stored after `G.set_phenotype(...)` (phenotype DataFrame + metadata)
134
166
  - `G.MR_data`: stored after `G.query_outcome(...)` (exposure/outcome association tables used by MR)
135
167
  - `G.MR_results`: stored after `G.MR(...)` (results table + harmonized SNP table; used by plotting)
168
+ - `G.MRpresso_subset_data`: stored after `G.MRpresso(...)` (outlier-removed harmonized table)
136
169
 
137
170
  Most methods either:
138
171
  - return a **new `Geno`** object (e.g., `clump()`), or
@@ -147,6 +180,7 @@ Most methods either:
147
180
  - **Align rsIDs to a target genotype dataset**: `Geno.update_snpids(path=..., replace=...)`
148
181
  - **Extract genotype subset**: `Geno.extract_snps(path=...)` → writes extracted files under `tmp_GENAL/`
149
182
  - **Two-sample MR pipeline**: `Geno.query_outcome(...)` → `Geno.MR(...)` (+ `MR_plot`)
183
+ - **Leave-one-out MR**: `Geno.MR_loo(...)` → `Geno.MR_loo_plot(...)` (identify influential variants)
150
184
  - **MR-PRESSO**: `Geno.MRpresso(...)` (parallel; outlier + distortion tests)
151
185
  - **Colocalization**: `Geno.colocalize(...)` (approx Bayes factors; returns posterior probabilities)
152
186
  - **Association testing (individual-level)**: `Geno.set_phenotype(...)` → `Geno.association_test(...)`
@@ -298,7 +332,17 @@ About `action` (palindromic SNP handling during harmonization):
298
332
  Plot the MR scatter:
299
333
 
300
334
  ```python
301
- G_clumped.MR_plot(filename="mr_scatter")
335
+ G_clumped.MR_plot(filename="mr_scatter", figure_size=(10, 6))
336
+ ```
337
+
338
+ You can also draw a funnel plot of single-SNP ratio estimates (Wald ratios):
339
+
340
+ ```python
341
+ G_clumped.MR_funnel(
342
+ methods=["IVW", "WM", "Egger"], # vertical reference lines (optional)
343
+ filename="mr_funnel",
344
+ figure_size=(10, 6),
345
+ )
302
346
  ```
303
347
 
304
348
  ### 6) Sensitivity: MR-PRESSO
@@ -311,7 +355,16 @@ mod_table, GlobalTest, OutlierTest, BiasTest = G_clumped.MRpresso(
311
355
  cpus=-1, # use all CPU cores
312
356
  )
313
357
  ```
314
- If you want to rerun all MR methods after removing outliers with MR-PRESSO, you can use the `use_mrpresso_data=True` argument in `MR()`:
358
+
359
+ To highlight MR-PRESSO outliers on plots, pass `use_mrpresso_data=True` (outliers are colored in red):
360
+
361
+ ```python
362
+ G_clumped.MR_plot(filename="mr_scatter_mrpresso", figure_size=(10, 6), use_mrpresso_data=True)
363
+ G_clumped.MR_funnel(filename="mr_funnel_mrpresso", figure_size=(10, 6), use_mrpresso_data=True)
364
+ G_clumped.MR_loo_plot(filename="loo_forest_mrpresso", figure_size=(10, 8), use_mrpresso_data=True)
365
+ ```
366
+
367
+ If you want to rerun MR methods after removing outliers with MR-PRESSO, you can use the `use_mrpresso_data=True` argument in `MR()`:
315
368
  ```python
316
369
  res = G_clumped.MR(
317
370
  action=2,
@@ -325,7 +378,29 @@ res = G_clumped.MR(
325
378
  res
326
379
  ```
327
380
 
328
- ### 7) Single-SNP association tests (individual-level data)
381
+ ### 7) Sensitivity: Leave-One-Out MR
382
+ Leave-one-out MR helps identify influential variants that may be driving the causal estimate.
383
+
384
+ ```python
385
+ # Run leave-one-out analysis (default uses IVW)
386
+ loo_results = G_clumped.MR_loo(method="IVW", heterogeneity=False, odds=False)
387
+ ```
388
+
389
+ Visualize the results with a forest plot:
390
+
391
+ ```python
392
+ # Default: show top influential instruments
393
+ G_clumped.MR_loo_plot(filename="loo_forest", figure_size=(10, 8))
394
+ ```
395
+
396
+ Tips:
397
+ - `MR_loo` accepts the same `action`, `use_mrpresso_data`, and method parameters as `MR`.
398
+ - `MR_loo_plot` supports `top_influential=True` (default) for a compact figure showing the most influential SNPs, or `top_influential=False` for paginated output with all instruments.
399
+ - `MR_loo_plot(..., use_mrpresso_data=True)` colors MR-PRESSO outliers in red (requires running `MRpresso()` first). When outliers exist, an extra summary row ("MR-PRESSO corrected") is added using the same MR method as the leave-one-out analysis.
400
+ - `MR_loo_plot(..., methods=["WM", "Egger"])` adds extra overall estimates for the requested methods (computed on all instruments).
401
+ - Set `odds=True` in `MR_loo` if you want odds ratio scaling on the plot.
402
+
403
+ ### 8) Single-SNP association tests (individual-level data)
329
404
 
330
405
  Use individual-level data to re-estimate SNP–trait effects in a specific cohort (e.g., different ancestry, different measurement protocol).
331
406
 
@@ -354,7 +429,7 @@ G_adj.association_test(
354
429
 
355
430
  This updates `G_adj.data[["BETA","SE","P"]]` with cohort-specific estimates and recomputes `FSTAT` to be consistent with the updated values.
356
431
 
357
- ### 8) Lift to a different build
432
+ ### 9) Lift to a different build
358
433
 
359
434
  Lift variants between builds (e.g., hg19 → hg38):
360
435
 
@@ -365,7 +440,7 @@ lifted.head()
365
440
 
366
441
  For large datasets, you can provide a UCSC LiftOver executable via `liftover_path`.
367
442
 
368
- ### 9) Query the GWAS Catalog
443
+ ### 10) Query the GWAS Catalog
369
444
 
370
445
  Attach a per-SNP list of associated traits using the GWAS Catalog API:
371
446
 
@@ -520,3 +595,4 @@ If you use methods derived from other packages (e.g., MR-PRESSO), please also ci
520
595
 
521
596
  ## License
522
597
  GPL-3.0-or-later (see `LICENSE`).
598
+
@@ -1,31 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: genal-python
3
- Version: 1.5.0
4
- Summary: A python toolkit for polygenic risk scoring and mendelian randomization.
5
- Author-email: Cyprien Rivier <riviercyprien@gmail.com>
6
- Requires-Python: >=3.8
7
- Description-Content-Type: text/markdown
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
10
- Classifier: Operating System :: OS Independent
11
- License-File: LICENSE
12
- Requires-Dist: aiohttp>=3.7
13
- Requires-Dist: nest_asyncio>=1.5
14
- Requires-Dist: numpy>=1.17.3
15
- Requires-Dist: pandas>=1.0
16
- Requires-Dist: plotnine>=0.9
17
- Requires-Dist: psutil>=5.0
18
- Requires-Dist: requests>=2.0
19
- Requires-Dist: pyliftover>=0.4
20
- Requires-Dist: scikit_learn>=0.24
21
- Requires-Dist: scipy>=1.7,<1.13
22
- Requires-Dist: statsmodels>=0.13,<0.15
23
- Requires-Dist: tqdm>=4.38
24
- Requires-Dist: wget>=3.0
25
- Requires-Dist: fastparquet>=0.4
26
- Requires-Dist: pyarrow>=3.0
27
- Project-URL: Home, https://github.com/CypRiv/genal
28
-
29
1
  <div align="center">
30
2
  <img src="genal_logo.png" height="80" alt="genal logo" />
31
3
  <h1>genal</h1>
@@ -151,7 +123,7 @@ G_instruments.query_outcome(G_outcome, proxy=True, reference_panel="EUR_37")
151
123
  mr_results = G_instruments.MR(action=2, heterogeneity=True, odds=False)
152
124
 
153
125
  # 5.5) Plot MR results
154
- G_instruments.MR_plot(filename="mr_scatter")
126
+ G_instruments.MR_plot(filename="mr_scatter", figure_size=(10, 6))
155
127
  ```
156
128
  ## Core concept: the `Geno` object
157
129
 
@@ -161,6 +133,7 @@ G_instruments.MR_plot(filename="mr_scatter")
161
133
  - `G.phenotype`: stored after `G.set_phenotype(...)` (phenotype DataFrame + metadata)
162
134
  - `G.MR_data`: stored after `G.query_outcome(...)` (exposure/outcome association tables used by MR)
163
135
  - `G.MR_results`: stored after `G.MR(...)` (results table + harmonized SNP table; used by plotting)
136
+ - `G.MRpresso_subset_data`: stored after `G.MRpresso(...)` (outlier-removed harmonized table)
164
137
 
165
138
  Most methods either:
166
139
  - return a **new `Geno`** object (e.g., `clump()`), or
@@ -175,6 +148,7 @@ Most methods either:
175
148
  - **Align rsIDs to a target genotype dataset**: `Geno.update_snpids(path=..., replace=...)`
176
149
  - **Extract genotype subset**: `Geno.extract_snps(path=...)` → writes extracted files under `tmp_GENAL/`
177
150
  - **Two-sample MR pipeline**: `Geno.query_outcome(...)` → `Geno.MR(...)` (+ `MR_plot`)
151
+ - **Leave-one-out MR**: `Geno.MR_loo(...)` → `Geno.MR_loo_plot(...)` (identify influential variants)
178
152
  - **MR-PRESSO**: `Geno.MRpresso(...)` (parallel; outlier + distortion tests)
179
153
  - **Colocalization**: `Geno.colocalize(...)` (approx Bayes factors; returns posterior probabilities)
180
154
  - **Association testing (individual-level)**: `Geno.set_phenotype(...)` → `Geno.association_test(...)`
@@ -326,7 +300,17 @@ About `action` (palindromic SNP handling during harmonization):
326
300
  Plot the MR scatter:
327
301
 
328
302
  ```python
329
- G_clumped.MR_plot(filename="mr_scatter")
303
+ G_clumped.MR_plot(filename="mr_scatter", figure_size=(10, 6))
304
+ ```
305
+
306
+ You can also draw a funnel plot of single-SNP ratio estimates (Wald ratios):
307
+
308
+ ```python
309
+ G_clumped.MR_funnel(
310
+ methods=["IVW", "WM", "Egger"], # vertical reference lines (optional)
311
+ filename="mr_funnel",
312
+ figure_size=(10, 6),
313
+ )
330
314
  ```
331
315
 
332
316
  ### 6) Sensitivity: MR-PRESSO
@@ -339,7 +323,16 @@ mod_table, GlobalTest, OutlierTest, BiasTest = G_clumped.MRpresso(
339
323
  cpus=-1, # use all CPU cores
340
324
  )
341
325
  ```
342
- If you want to rerun all MR methods after removing outliers with MR-PRESSO, you can use the `use_mrpresso_data=True` argument in `MR()`:
326
+
327
+ To highlight MR-PRESSO outliers on plots, pass `use_mrpresso_data=True` (outliers are colored in red):
328
+
329
+ ```python
330
+ G_clumped.MR_plot(filename="mr_scatter_mrpresso", figure_size=(10, 6), use_mrpresso_data=True)
331
+ G_clumped.MR_funnel(filename="mr_funnel_mrpresso", figure_size=(10, 6), use_mrpresso_data=True)
332
+ G_clumped.MR_loo_plot(filename="loo_forest_mrpresso", figure_size=(10, 8), use_mrpresso_data=True)
333
+ ```
334
+
335
+ If you want to rerun MR methods after removing outliers with MR-PRESSO, you can use the `use_mrpresso_data=True` argument in `MR()`:
343
336
  ```python
344
337
  res = G_clumped.MR(
345
338
  action=2,
@@ -353,7 +346,29 @@ res = G_clumped.MR(
353
346
  res
354
347
  ```
355
348
 
356
- ### 7) Single-SNP association tests (individual-level data)
349
+ ### 7) Sensitivity: Leave-One-Out MR
350
+ Leave-one-out MR helps identify influential variants that may be driving the causal estimate.
351
+
352
+ ```python
353
+ # Run leave-one-out analysis (default uses IVW)
354
+ loo_results = G_clumped.MR_loo(method="IVW", heterogeneity=False, odds=False)
355
+ ```
356
+
357
+ Visualize the results with a forest plot:
358
+
359
+ ```python
360
+ # Default: show top influential instruments
361
+ G_clumped.MR_loo_plot(filename="loo_forest", figure_size=(10, 8))
362
+ ```
363
+
364
+ Tips:
365
+ - `MR_loo` accepts the same `action`, `use_mrpresso_data`, and method parameters as `MR`.
366
+ - `MR_loo_plot` supports `top_influential=True` (default) for a compact figure showing the most influential SNPs, or `top_influential=False` for paginated output with all instruments.
367
+ - `MR_loo_plot(..., use_mrpresso_data=True)` colors MR-PRESSO outliers in red (requires running `MRpresso()` first). When outliers exist, an extra summary row ("MR-PRESSO corrected") is added using the same MR method as the leave-one-out analysis.
368
+ - `MR_loo_plot(..., methods=["WM", "Egger"])` adds extra overall estimates for the requested methods (computed on all instruments).
369
+ - Set `odds=True` in `MR_loo` if you want odds ratio scaling on the plot.
370
+
371
+ ### 8) Single-SNP association tests (individual-level data)
357
372
 
358
373
  Use individual-level data to re-estimate SNP–trait effects in a specific cohort (e.g., different ancestry, different measurement protocol).
359
374
 
@@ -382,7 +397,7 @@ G_adj.association_test(
382
397
 
383
398
  This updates `G_adj.data[["BETA","SE","P"]]` with cohort-specific estimates and recomputes `FSTAT` to be consistent with the updated values.
384
399
 
385
- ### 8) Lift to a different build
400
+ ### 9) Lift to a different build
386
401
 
387
402
  Lift variants between builds (e.g., hg19 → hg38):
388
403
 
@@ -393,7 +408,7 @@ lifted.head()
393
408
 
394
409
  For large datasets, you can provide a UCSC LiftOver executable via `liftover_path`.
395
410
 
396
- ### 9) Query the GWAS Catalog
411
+ ### 10) Query the GWAS Catalog
397
412
 
398
413
  Attach a per-SNP list of associated traits using the GWAS Catalog API:
399
414
 
@@ -548,4 +563,3 @@ If you use methods derived from other packages (e.g., MR-PRESSO), please also ci
548
563
 
549
564
  ## License
550
565
  GPL-3.0-or-later (see `LICENSE`).
551
-
@@ -10,6 +10,7 @@ Key attributes (you don't need to manipulate these directly):
10
10
  - `G.phenotype`: set by {py:meth}`genal.Geno.set_phenotype` (phenotype DataFrame + metadata)
11
11
  - `G.MR_data`: set by {py:meth}`genal.Geno.query_outcome` (exposure/outcome tables used by MR)
12
12
  - `G.MR_results`: set by {py:meth}`genal.Geno.MR` (results table + harmonized SNP table; used by plotting)
13
+ - `G.MR_loo_results`: set by {py:meth}`genal.Geno.MR_loo` (leave-one-out results tuple; used by `MR_loo_plot`)
13
14
  - `G.MRpresso_results` / `G.MRpresso_subset_data`: set by {py:meth}`genal.Geno.MRpresso`
14
15
 
15
16
  ## Standard columns
@@ -38,7 +39,7 @@ This is a *practical guide*, not an exhaustive contract. When a method can work
38
39
  | {py:meth}`genal.Geno.clump` | `SNP`, `P` | LD clumping via PLINK; returns a new `Geno` (or `None` if nothing passes). |
39
40
  | {py:meth}`genal.Geno.prs` | `EA`, `BETA`, plus `SNP (or CHR+POS)` | If `CHR+POS` are available, genal will prefer position-based matching to your genotype dataset to reduce ID-mismatch losses. |
40
41
  | {py:meth}`genal.Geno.query_outcome` | `SNP`, `EA`, `NEA`, `BETA`, `SE` (exposure and outcome) | Outcome querying is rsID-based; proxy search is optional. If you plan to use `action=2` later, `EAF` in both datasets is strongly recommended. |
41
- | {py:meth}`genal.Geno.MR` / {py:meth}`genal.Geno.MRpresso` | `MR_data` | Both consume `MR_data` produced by `query_outcome()`. |
42
+ | {py:meth}`genal.Geno.MR` / {py:meth}`genal.Geno.MR_loo` / {py:meth}`genal.Geno.MRpresso` | `MR_data` | All consume `MR_data` produced by `query_outcome()`. |
42
43
  | {py:meth}`genal.Geno.colocalize` | `BETA`, `SE`, plus `CHR+POS` (preferred) **or** `SNP` (in both datasets) | If `EA/NEA` are present in both datasets, effects are allele-aligned; otherwise results assume both GWAS use the same reference allele. For quantitative traits, provide `sdY` or (`EAF` + `n`) to avoid the default `sdY=1` assumption. |
43
44
  | {py:meth}`genal.Geno.update_eaf` | `EA`, plus `CHR+POS` **or** `SNP` | Uses PLINK to compute allele frequencies from a reference panel; coordinate-based matching is faster when available. |
44
45
  | {py:meth}`genal.Geno.filter_by_gene` / {py:meth}`genal.Geno.lift` | `CHR`, `POS` | Genomic coordinate operations. |
@@ -65,8 +66,11 @@ A helpful mental framework:
65
66
  | `association_test()` | `None` | runs PLINK `--glm`; mutates `G.data` (`BETA/SE/P`) |
66
67
  | `query_outcome()` | `None` | sets `G.MR_data` (exposure/outcome tables used by MR) |
67
68
  | `MR()` | `pd.DataFrame` | sets `G.MR_results` and returns the results table |
68
- | `MR_plot()` | plot object | requires `G.MR_results`; writes `.png` if `filename=...` |
69
- | `MRpresso()` | tuple | sets `G.MRpresso_results` and `G.MRpresso_subset_data` (outlier-removed harmonized table) |
69
+ | `MR_plot()` | plot object | requires `G.MR_results`; writes `.png` if `filename=...`; supports `use_mrpresso_data=True` for outlier highlighting |
70
+ | `MR_funnel()` | plot object | requires `G.MR_results`; writes `.png` if `filename=...`; supports `use_mrpresso_data=True` for outlier highlighting |
71
+ | `MR_loo()` | `pd.DataFrame` | sets `G.MR_loo_results` and returns the LOO results table |
72
+ | `MR_loo_plot()` | plot object(s) | requires `G.MR_loo_results`; writes `.png` if `filename=...`; may return a list for multi-page output; supports `methods=[...]` overall rows and `use_mrpresso_data=True` for outlier highlighting |
73
+ | `MRpresso()` | tuple | sets `G.MRpresso_results` and `G.MRpresso_subset_data` (outlier-removed harmonized table; SNP-indexed) |
70
74
  | `prs()` | `None` | writes `<name>.csv` and uses PLINK temp files |
71
75
  | `query_gwas_catalog()` | `pd.DataFrame` | adds an `ASSOC` column (network-bound); `replace=True` overwrites `G.data` |
72
76
  | `filter_by_gene(replace=False)` | `Geno` | returns a new `Geno` filtered to a locus |
@@ -80,7 +84,7 @@ Be aware of these common side effects:
80
84
 
81
85
  - `~/.genal/config.json` is created/updated as you configure PLINK, reference folders, or default genotype paths.
82
86
  - `tmp_GENAL/` is used as a scratch directory for PLINK commands and is **not** automatically deleted.
83
- - Some methods generate output files in your current directory (notably `prs()`, and plot saving in `MR_plot()`).
87
+ - Some methods generate output files in your current directory (notably `prs()`, and plot saving in `MR_plot()`, `MR_funnel()`, and `MR_loo_plot()`).
84
88
 
85
89
  ## Resource usage (`ram`, `cpus`)
86
90
 
@@ -132,6 +132,30 @@ The bandwidth uses a modified Silverman rule multiplied by the user-provided fac
132
132
 
133
133
  The sign method tests whether exposure and outcome effects tend to have the same sign across variants. `genal` performs a binomial test against the null of 50% sign agreement.
134
134
 
135
+ ## Leave-one-out MR
136
+
137
+ Implementation: {py:func}`genal.MR_tools.MR_loo_func`, wrapped by {py:meth}`genal.Geno.MR_loo`.
138
+
139
+ Leave-one-out MR iterates over all instruments, sequentially removing each SNP and re-estimating the causal effect using the remaining instruments. This identifies variants that have a disproportionate influence on the overall estimate.
140
+
141
+ For each SNP $i$ in the instrument set:
142
+
143
+ 1. Remove SNP $i$ from the harmonized data.
144
+ 2. Re-run the selected MR method on the remaining $J-1$ SNPs.
145
+ 3. Store the resulting estimate $\hat{\theta}_{-i}$.
146
+
147
+ The "influence" of SNP $i$ is defined as:
148
+
149
+ ```{math}
150
+ \text{influence}_i = \left| \hat{\theta}_{-i} - \hat{\theta}_{\text{all}} \right|
151
+ ```
152
+
153
+ where $\hat{\theta}_{\text{all}}$ is the estimate using all instruments.
154
+
155
+ Notes:
156
+ - Any single MR method can be used (IVW, Egger, weighted median, mode-based, etc.).
157
+ - At least 3 instruments are required (so that each LOO subset has ≥2 instruments).
158
+
135
159
  ## MR-PRESSO (summary)
136
160
 
137
161
  Implementation: {py:func}`genal.MRpresso.mr_presso`, wrapped by {py:meth}`genal.Geno.MRpresso`.
@@ -144,11 +168,26 @@ At a high level, `genal`'s MR-PRESSO implementation:
144
168
  - an outlier test (per-variant p-values, Bonferroni-corrected),
145
169
  - an optional distortion test (whether the causal estimate changes materially after removing outliers).
146
170
 
171
+ ### Distortion test
172
+
173
+ The distortion test assesses whether detected outliers materially bias the causal estimate. `genal` implements the following version:
174
+
175
+ 1. **Observed distortion**: $D_\text{obs} = (\hat{\theta}_\text{all} - \hat{\theta}_\text{no outliers}) / |\hat{\theta}_\text{no outliers}|$
176
+ 2. **Expected distortion null**: bootstrap resampling is performed *exclusively on the non-outlier subset*. For each iteration:
177
+ - Sample with replacement $J-k$ SNPs from the non-outlier data (where $k$ is the number of detected outliers).
178
+ - Fit the IVW model on the sampled data and record $\hat{\theta}_\text{exp}$.
179
+ - Compute $D_\text{exp} = (\hat{\theta}_\text{all} - \hat{\theta}_\text{exp}) / |\hat{\theta}_\text{exp}|$.
180
+ 3. **P-value**: $p = \text{mean}(|D_\text{exp}| > |D_\text{obs}|)$.
181
+
182
+ This differs from the original MR-PRESSO R implementation, which in some cases samples from the full dataset (including outliers) for the expected-bias regressions and was inconsistent with the paper's description.
183
+
184
+ ### Output structure
185
+
147
186
  `Geno.MRpresso()` returns four objects:
148
187
  - `mod_table`: a small results table (`Raw` and `Outlier-corrected` rows; IVW model),
149
188
  - `GlobalTest`: RSS and global p-value,
150
- - `OutlierTest`: per-variant outlier p-values (empty if the global test is not significant),
151
- - `BiasTest`: distortion test result dictionary (empty if distortion test was not run).
189
+ - `OutlierTest`: per-variant outlier p-values (empty if the global test is not significant); SNP IDs as row labels,
190
+ - `BiasTest`: distortion test result dictionary containing `"outliers_indices"` (SNP IDs), `"distortion_test_coefficient"`, and `"distortion_test_p"` (empty if distortion test was not run).
152
191
 
153
192
  If outliers are found, `genal` stores the outlier-removed harmonized table and allows rerunning MR with `Geno.MR(use_mrpresso_data=True)`.
154
193
 
@@ -175,10 +175,20 @@ Key arguments you commonly tune:
175
175
  After `MR()`, you can generate a scatter plot with method lines:
176
176
 
177
177
  ```python
178
- G_instruments.MR_plot(filename="mr_scatter") # saves mr_scatter.png
178
+ G_instruments.MR_plot(filename="mr_scatter", figure_size=(10, 6)) # saves mr_scatter.png
179
179
  ```
180
180
 
181
- ## 6) MR-PRESSO (outlier detection and distortion testing)
181
+ You can also draw a funnel plot of single-SNP ratio estimates (Wald ratios):
182
+
183
+ ```python
184
+ G_instruments.MR_funnel(
185
+ methods=["IVW", "WM", "Egger"], # vertical reference lines (optional)
186
+ filename="mr_funnel",
187
+ figure_size=(10, 6),
188
+ )
189
+ ```
190
+
191
+ ## 6a) MR-PRESSO (outlier detection and distortion testing)
182
192
 
183
193
  {py:meth}`genal.Geno.MRpresso` runs a parallel MR-PRESSO implementation.
184
194
 
@@ -196,14 +206,71 @@ What you typically tune:
196
206
  - `significance_p`: threshold for global/outlier tests.
197
207
  - `outlier_test` / `distortion_test`: disable if you only want the global test.
198
208
 
209
+ Output structure:
210
+ - `OutlierTest`: DataFrame with per-SNP outlier p-values; SNP identifiers (rsIDs) are used as row labels (not numeric indices).
211
+ - `BiasTest`: dictionary containing `"outliers_indices"` (list of SNP IDs), `"distortion_test_coefficient"`, and `"distortion_test_p"`.
212
+
199
213
  If outliers are found, you can rerun MR using the outlier-removed subset:
200
214
 
201
215
  ```python
202
216
  res_no_outliers = G_instruments.MR(use_mrpresso_data=True)
203
217
  ```
204
218
 
219
+ To highlight MR-PRESSO outliers on plots, pass `use_mrpresso_data=True` (outliers are colored in red and shown in the legend):
220
+
221
+ ```python
222
+ G_instruments.MR_plot(filename="mr_scatter_mrpresso", figure_size=(10, 6), use_mrpresso_data=True)
223
+ G_instruments.MR_funnel(filename="mr_funnel_mrpresso", figure_size=(10, 6), use_mrpresso_data=True)
224
+ G_instruments.MR_loo_plot(filename="loo_forest_mrpresso", figure_size=(10, 8), use_mrpresso_data=True)
225
+ ```
226
+
205
227
  See {doc}`methods` for algorithm details and outputs.
206
228
 
229
+ ## 6b) Leave-one-out MR (sensitivity analysis)
230
+
231
+ {py:meth}`genal.Geno.MR_loo` iteratively removes each SNP and re-estimates the causal effect. This helps identify influential variants that may be driving the overall result.
232
+
233
+ ```python
234
+ loo_df = G_instruments.MR_loo(
235
+ method="IVW", # any single MR method key (see MR method map)
236
+ action=2,
237
+ heterogeneity=False, # set True to include Q statistics
238
+ odds=False, # set True for OR-scale output
239
+ )
240
+ ```
241
+
242
+ Key arguments:
243
+ - `method`: a single MR method key (e.g., `"IVW"`, `"Egger"`, `"WM"`); must not be `"all"`.
244
+ - `use_mrpresso_data=True`: use the outlier-removed dataset from MR-PRESSO instead of all instruments.
245
+
246
+ ### Visualizing leave-one-out results
247
+
248
+ {py:meth}`genal.Geno.MR_loo_plot` creates a forest plot from the stored `MR_loo_results`:
249
+
250
+ ```python
251
+ # Default: show top influential instruments
252
+ G_instruments.MR_loo_plot(filename="loo_forest", figure_size=(10, 8))
253
+ ```
254
+
255
+ ```python
256
+ # Or paginate all instruments
257
+ G_instruments.MR_loo_plot(
258
+ top_influential=False, # show all, not just influential
259
+ snps_per_page=30,
260
+ page=1, # or None for all pages
261
+ filename="loo_forest_all",
262
+ figure_size=(10, 12),
263
+ )
264
+ ```
265
+
266
+ Key arguments:
267
+ - `top_influential=True` (default): select the `snps_per_page` most influential SNPs (largest change in estimate when removed) and render a single compact figure.
268
+ - `top_influential=False`: paginate all instruments; use `page=N` to select a specific page or `page=None` to render all pages.
269
+ - `snps_per_page`: number of SNPs per page (minimum 5).
270
+ - `use_mrpresso_data=True`: color MR-PRESSO outliers in red (requires `MRpresso()` first). When outliers exist, an extra summary row ("MR-PRESSO corrected") is added using the same MR method as the leave-one-out analysis.
271
+ - `methods=["WM", "Egger"]`: add extra overall estimates for the requested methods (computed on all instruments).
272
+
273
+
207
274
  ## 7) Additional capabilities (beyond the core pipeline)
208
275
 
209
276
  ### Single-SNP association tests (individual-level data)