genal-python 1.2.7__tar.gz → 1.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {genal_python-1.2.7 → genal_python-1.2.9}/PKG-INFO +1 -1
  2. {genal_python-1.2.7 → genal_python-1.2.9}/genal/Geno.py +107 -14
  3. {genal_python-1.2.7 → genal_python-1.2.9}/genal/MR_tools.py +4 -1
  4. {genal_python-1.2.7 → genal_python-1.2.9}/genal/__init__.py +1 -1
  5. genal_python-1.2.9/genal/colocalization.py +159 -0
  6. {genal_python-1.2.7 → genal_python-1.2.9}/genal/extract_prs.py +1 -1
  7. {genal_python-1.2.7 → genal_python-1.2.9}/genal/proxy.py +2 -2
  8. {genal_python-1.2.7 → genal_python-1.2.9}/pyproject.toml +1 -1
  9. {genal_python-1.2.7 → genal_python-1.2.9}/.DS_Store +0 -0
  10. {genal_python-1.2.7 → genal_python-1.2.9}/.gitignore +0 -0
  11. {genal_python-1.2.7 → genal_python-1.2.9}/.readthedocs.yaml +0 -0
  12. {genal_python-1.2.7 → genal_python-1.2.9}/Genal_flowchart.png +0 -0
  13. {genal_python-1.2.7 → genal_python-1.2.9}/LICENSE +0 -0
  14. {genal_python-1.2.7 → genal_python-1.2.9}/README.md +0 -0
  15. {genal_python-1.2.7 → genal_python-1.2.9}/docs/.DS_Store +0 -0
  16. {genal_python-1.2.7 → genal_python-1.2.9}/docs/Makefile +0 -0
  17. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/.DS_Store +0 -0
  18. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/.buildinfo +0 -0
  19. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/.doctrees/api.doctree +0 -0
  20. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/.doctrees/environment.pickle +0 -0
  21. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/.doctrees/genal.doctree +0 -0
  22. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/.doctrees/index.doctree +0 -0
  23. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/.doctrees/introduction.doctree +0 -0
  24. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/.doctrees/modules.doctree +0 -0
  25. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_images/MR_plot_SBP_AS.png +0 -0
  26. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_modules/genal/Geno.html +0 -0
  27. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_modules/genal/MR.html +0 -0
  28. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_modules/genal/MR_tools.html +0 -0
  29. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_modules/genal/MRpresso.html +0 -0
  30. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_modules/genal/association.html +0 -0
  31. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_modules/genal/clump.html +0 -0
  32. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_modules/genal/extract_prs.html +0 -0
  33. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_modules/genal/geno_tools.html +0 -0
  34. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_modules/genal/lift.html +0 -0
  35. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_modules/genal/proxy.html +0 -0
  36. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_modules/genal/snp_query.html +0 -0
  37. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_modules/genal/tools.html +0 -0
  38. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_modules/index.html +0 -0
  39. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_sources/api.rst.txt +0 -0
  40. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_sources/genal.rst.txt +0 -0
  41. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_sources/index.rst.txt +0 -0
  42. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_sources/introduction.rst.txt +0 -0
  43. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_sources/modules.rst.txt +0 -0
  44. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/basic.css +0 -0
  45. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/badge_only.css +0 -0
  46. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/Roboto-Slab-Bold.woff +0 -0
  47. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/Roboto-Slab-Bold.woff2 +0 -0
  48. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/Roboto-Slab-Regular.woff +0 -0
  49. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/Roboto-Slab-Regular.woff2 +0 -0
  50. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/fontawesome-webfont.eot +0 -0
  51. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/fontawesome-webfont.svg +0 -0
  52. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/fontawesome-webfont.ttf +0 -0
  53. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/fontawesome-webfont.woff +0 -0
  54. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/fontawesome-webfont.woff2 +0 -0
  55. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/lato-bold-italic.woff +0 -0
  56. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/lato-bold-italic.woff2 +0 -0
  57. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/lato-bold.woff +0 -0
  58. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/lato-bold.woff2 +0 -0
  59. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/lato-normal-italic.woff +0 -0
  60. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/lato-normal-italic.woff2 +0 -0
  61. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/lato-normal.woff +0 -0
  62. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/fonts/lato-normal.woff2 +0 -0
  63. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/css/theme.css +0 -0
  64. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/doctools.js +0 -0
  65. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/documentation_options.js +0 -0
  66. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/file.png +0 -0
  67. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/js/badge_only.js +0 -0
  68. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/js/html5shiv-printshiv.min.js +0 -0
  69. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/js/html5shiv.min.js +0 -0
  70. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/js/theme.js +0 -0
  71. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/language_data.js +0 -0
  72. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/minus.png +0 -0
  73. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/plus.png +0 -0
  74. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/pygments.css +0 -0
  75. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/searchtools.js +0 -0
  76. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/_static/sphinx_highlight.js +0 -0
  77. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/api.html +0 -0
  78. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/genal.html +0 -0
  79. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/genindex.html +0 -0
  80. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/index.html +0 -0
  81. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/introduction.html +0 -0
  82. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/modules.html +0 -0
  83. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/objects.inv +0 -0
  84. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/py-modindex.html +0 -0
  85. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/search.html +0 -0
  86. {genal_python-1.2.7 → genal_python-1.2.9}/docs/build/searchindex.js +0 -0
  87. {genal_python-1.2.7 → genal_python-1.2.9}/docs/make.bat +0 -0
  88. {genal_python-1.2.7 → genal_python-1.2.9}/docs/requirements.txt +0 -0
  89. {genal_python-1.2.7 → genal_python-1.2.9}/docs/source/.DS_Store +0 -0
  90. {genal_python-1.2.7 → genal_python-1.2.9}/docs/source/Images/Genal_flowchart.png +0 -0
  91. {genal_python-1.2.7 → genal_python-1.2.9}/docs/source/Images/MR_plot_SBP_AS.png +0 -0
  92. {genal_python-1.2.7 → genal_python-1.2.9}/docs/source/Images/genal_logo.png +0 -0
  93. {genal_python-1.2.7 → genal_python-1.2.9}/docs/source/api.rst +0 -0
  94. {genal_python-1.2.7 → genal_python-1.2.9}/docs/source/conf.py +0 -0
  95. {genal_python-1.2.7 → genal_python-1.2.9}/docs/source/index.rst +0 -0
  96. {genal_python-1.2.7 → genal_python-1.2.9}/docs/source/introduction.rst +0 -0
  97. {genal_python-1.2.7 → genal_python-1.2.9}/docs/source/modules.rst +0 -0
  98. {genal_python-1.2.7 → genal_python-1.2.9}/genal/MR.py +0 -0
  99. {genal_python-1.2.7 → genal_python-1.2.9}/genal/MRpresso.py +0 -0
  100. {genal_python-1.2.7 → genal_python-1.2.9}/genal/association.py +0 -0
  101. {genal_python-1.2.7 → genal_python-1.2.9}/genal/clump.py +0 -0
  102. {genal_python-1.2.7 → genal_python-1.2.9}/genal/constants.py +0 -0
  103. {genal_python-1.2.7 → genal_python-1.2.9}/genal/geno_tools.py +0 -0
  104. {genal_python-1.2.7 → genal_python-1.2.9}/genal/lift.py +0 -0
  105. {genal_python-1.2.7 → genal_python-1.2.9}/genal/snp_query.py +0 -0
  106. {genal_python-1.2.7 → genal_python-1.2.9}/genal/tools.py +0 -0
  107. {genal_python-1.2.7 → genal_python-1.2.9}/genal_logo.png +0 -0
  108. {genal_python-1.2.7 → genal_python-1.2.9}/gitignore +0 -0
  109. {genal_python-1.2.7 → genal_python-1.2.9}/readthedocs.yaml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: genal-python
3
- Version: 1.2.7
3
+ Version: 1.2.9
4
4
  Summary: A python toolkit for polygenic risk scoring and mendelian randomization.
5
5
  Author-email: Cyprien Rivier <riviercyprien@gmail.com>
6
6
  Requires-Python: >=3.8
@@ -36,6 +36,7 @@ from .association import set_phenotype_func, association_test_func_plink2
36
36
  from .extract_prs import extract_snps_func, prs_func
37
37
  from .snp_query import async_query_gwas_catalog
38
38
  from .constants import STANDARD_COLUMNS, REF_PANEL_COLUMNS, CHECKS_DICT, MR_METHODS_NAMES
39
+ from .colocalization import coloc_abf_func
39
40
 
40
41
  # Do all the MR steps (query_outcome, harmonize etc) based on CHR/POS and not SNPs
41
42
  # Consider reference panels in build 38
@@ -450,11 +451,6 @@ class Geno:
450
451
 
451
452
  # Update the SNP column
452
453
  data['SNP'] = data['SNP_new'].fillna(data['SNP'])
453
- #n_absent = data['SNP_new'].isna().sum()
454
- #if n_absent > 0:
455
- # print(f"{n_absent}({n_absent/data.shape[0]*100:.3f}%) are not present in the genetic data.")
456
- #else:
457
- # print("All SNPs are present in the genetic data.")
458
454
  data.drop(columns = ['SNP_new'], inplace=True)
459
455
 
460
456
  if replace: self.data = data #Update attribute if replace argument
@@ -495,9 +491,9 @@ class Geno:
495
491
  path=None,
496
492
  proxy=False,
497
493
  reference_panel="eur",
498
- kb=5000,
494
+ kb=10000,
499
495
  r2=0.6,
500
- window_snps=5000,
496
+ window_snps=10000,
501
497
 
502
498
  ):
503
499
  """
@@ -767,9 +763,9 @@ class Geno:
767
763
  name=None,
768
764
  proxy=True,
769
765
  reference_panel="eur",
770
- kb=5000,
766
+ kb=10000,
771
767
  r2=0.6,
772
- window_snps=5000,
768
+ window_snps=10000,
773
769
  ):
774
770
  """
775
771
  Prepares dataframes required for Mendelian Randomization (MR) with the SNP information in `data` as exposure.
@@ -896,19 +892,19 @@ class Geno:
896
892
  self.cpus,
897
893
  subset_data
898
894
  )
899
-
900
- self.MR_results = (res, df_mr, exposure_name, outcome_name)
901
895
 
902
896
  if not heterogeneity:
903
897
  res = res.loc[:,["exposure", "outcome", "method", "nSNP", "b", "se", "pval"]]
904
-
905
- if odds:
898
+
899
+ if odds and not res.empty:
906
900
  # Calculate odds ratios and confidence intervals using .loc
907
901
  res.loc[:,'OR_95CI'] = res.apply(lambda row:
908
902
  f"{np.exp(row['b']):.3f} ({np.exp(row['b'] - 1.96*row['se']):.3f}-{np.exp(row['b'] + 1.96*row['se']):.3f})"
909
903
  if not pd.isna(row['b']) and not pd.isna(row['se'])
910
904
  else np.nan, axis=1)
911
-
905
+
906
+ self.MR_results = (res, df_mr, exposure_name, outcome_name)
907
+
912
908
  return res
913
909
 
914
910
  def MR_plot(
@@ -1187,6 +1183,103 @@ class Geno:
1187
1183
 
1188
1184
  return mod_table, GlobalTest, OutlierTest, BiasTest
1189
1185
 
1186
+ def colocalize(self, outcome, method="abf", trait1_type=None, trait2_type=None,
1187
+ sdY1=None, sdY2=None, n1=None, n2=None, p1=1e-4, p2=1e-4, p12=1e-5):
1188
+ """
1189
+ Perform colocalization analysis between two GWAS datasets.
1190
+
1191
+ Args:
1192
+ outcome: Another Geno object containing the outcome dataset
1193
+ method: Method to use for colocalization (default: "abf")
1194
+ trait1_type: Type of exposure trait ("quant" or "cc")
1195
+ trait2_type: Type of outcome trait ("quant" or "cc")
1196
+ sdY1: Standard deviation of exposure trait (required for quantitative traits)
1197
+ sdY2: Standard deviation of outcome trait (required for quantitative traits)
1198
+ n1: Sample size for exposure (used to estimate sdY1 if not provided)
1199
+ n2: Sample size for outcome (used to estimate sdY2 if not provided)
1200
+ p1: Prior probability SNP associated with exposure
1201
+ p2: Prior probability SNP associated with outcome
1202
+ p12: Prior probability SNP associated with both traits
1203
+ """
1204
+ # Ensure required columns exist in both datasets
1205
+ required_cols = ['BETA', 'SE']
1206
+ for col in required_cols:
1207
+ if col not in self.data.columns:
1208
+ raise ValueError(f"Column {col} must be present in exposure dataset")
1209
+ if col not in outcome.data.columns:
1210
+ raise ValueError(f"Column {col} must be present in outcome dataset")
1211
+
1212
+ if trait1_type is None:
1213
+ print("trait1_type not specified. Assuming trait 1 is a quantitative trait.")
1214
+ trait1_type = "quant"
1215
+ if trait2_type is None:
1216
+ print("trait2_type not specified. Assuming trait 2 is a quantitative trait.")
1217
+ trait2_type = "quant"
1218
+
1219
+ # Make copies of the data to avoid modifying the original data
1220
+ data1 = self.data.copy()
1221
+ data2 = outcome.data.copy()
1222
+
1223
+ # Ensure that the BETA columns are preprocessed
1224
+ check_beta_column(data1, 'BETA', 'Fill')
1225
+ check_beta_column(data2, 'BETA', 'Fill')
1226
+
1227
+ # Adjust EAF column names before merging in case one of the datasets does not have it
1228
+ if 'EAF' in data1.columns:
1229
+ data1.rename(columns={'EAF': 'EAF_1'}, inplace=True)
1230
+ if 'EAF' in data2.columns:
1231
+ data2.rename(columns={'EAF': 'EAF_2'}, inplace=True)
1232
+
1233
+ # Determine merge strategy based on available columns
1234
+ if all(col in self.data.columns for col in ['CHR', 'POS']) and \
1235
+ all(col in outcome.data.columns for col in ['CHR', 'POS']):
1236
+ print("Merging datasets using CHR and POS")
1237
+
1238
+ #Ensure that the CHR, POS columns are preprocessed
1239
+ check_int_column(data1, "CHR")
1240
+ check_int_column(data1, "POS")
1241
+ check_int_column(data2, "CHR")
1242
+ check_int_column(data2, "POS")
1243
+
1244
+ # Merge the datasets
1245
+ merged_data = pd.merge(data1, data2,
1246
+ on=['CHR', 'POS'],
1247
+ suffixes=('_1', '_2'))
1248
+
1249
+ elif 'SNP' in self.data.columns and 'SNP' in outcome.data.columns:
1250
+ print("Merging datasets using SNP IDs")
1251
+
1252
+ # Ensure that the SNP column is preprocessed
1253
+ check_snp_column(data1)
1254
+ check_snp_column(data2)
1255
+
1256
+ # Merge the datasets
1257
+ merged_data = pd.merge(data1, data2,
1258
+ on='SNP',
1259
+ suffixes=('_1', '_2'))
1260
+ else:
1261
+ raise ValueError("Either CHR/POS or SNP columns must be present in both datasets for merging")
1262
+
1263
+ # Drop any rows with missing values
1264
+ merged_data = merged_data.dropna()
1265
+ if merged_data.empty:
1266
+ raise ValueError("No overlapping variants found between the datasets")
1267
+
1268
+ print(f"Using {len(merged_data)} overlapping variants for colocalization analysis")
1269
+
1270
+ # Call the implementation function
1271
+ return coloc_abf_func(merged_data,
1272
+ trait1_type=trait1_type,
1273
+ trait2_type=trait2_type,
1274
+ sdY1=sdY1,
1275
+ sdY2=sdY2,
1276
+ n1=n1,
1277
+ n2=n2,
1278
+ p1=p1,
1279
+ p2=p2,
1280
+ p12=p12)
1281
+
1282
+
1190
1283
  def lift(
1191
1284
  self,
1192
1285
  start="hg19",
@@ -287,6 +287,7 @@ def harmonize_MR(df_exposure, df_outcome, action=2, eaf_threshold=0.42):
287
287
  "EAF": "EAF_e",
288
288
  "BETA": "BETA_e",
289
289
  "SE": "SE_e",
290
+ "P": "P_e",
290
291
  },
291
292
  errors="ignore",
292
293
  )
@@ -297,12 +298,14 @@ def harmonize_MR(df_exposure, df_outcome, action=2, eaf_threshold=0.42):
297
298
  "EAF": "EAF_o",
298
299
  "BETA": "BETA_o",
299
300
  "SE": "SE_o",
301
+ "P": "P_o",
300
302
  },
301
303
  errors="ignore",
302
304
  )
305
+
303
306
  df_outcome = df_outcome[
304
307
  df_outcome.columns.intersection(
305
- ["SNP", "EA_o", "NEA_o", "EAF_o", "BETA_o", "SE_o"]
308
+ ["SNP", "EA_o", "NEA_o", "EAF_o", "BETA_o", "SE_o", "P_o"]
306
309
  )
307
310
  ]
308
311
 
@@ -4,7 +4,7 @@ from .tools import default_config, write_config, set_plink, install_plink, delet
4
4
  from .geno_tools import Combine_Geno
5
5
  from .constants import CONFIG_DIR
6
6
 
7
- __version__ = "1.2.7"
7
+ __version__ = "1.2.9"
8
8
 
9
9
  config_path = os.path.join(CONFIG_DIR, "config.json")
10
10
 
@@ -0,0 +1,159 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from numpy import exp, log
4
+
5
+
6
+
7
+
8
+ def coloc_abf_func(data, trait1_type="quant", trait2_type="quant",
9
+ sdY1=None, sdY2=None, n1=None, n2=None,
10
+ p1=1e-4, p2=1e-4, p12=1e-5):
11
+ """
12
+ Perform colocalization analysis between two GWAS datasets using approximate Bayes factors.
13
+
14
+ Args:
15
+ data: DataFrame containing merged GWAS results
16
+ trait1_type: Type of trait 1 ("quant" or "cc")
17
+ trait2_type: Type of trait 2 ("quant" or "cc")
18
+ sdY1: Standard deviation of trait 1 (required for quantitative traits)
19
+ sdY2: Standard deviation of trait 2 (required for quantitative traits)
20
+ n1: Sample size for trait 1 (used to estimate sdY if not provided)
21
+ n2: Sample size for trait 2 (used to estimate sdY if not provided)
22
+ p1: Prior probability SNP associated with trait 1
23
+ p2: Prior probability SNP associated with trait 2
24
+ p12: Prior probability SNP associated with both traits
25
+ """
26
+ # Estimate sdY if not provided for quantitative traits
27
+ if trait1_type == "quant" and sdY1 is None:
28
+ if 'EAF_1' not in data.columns or n1 is None:
29
+ print("Neither sdY1 nor EAF and n1 are provided for trait 1. Assuming sdY1 = 1.")
30
+ sdY1 = 1
31
+ else:
32
+ sdY1 = sdY_est(data['SE_1']**2, data['EAF_1'], n1)
33
+ print(f"Using EAF and n1 to estimate sdY1: {sdY1:.2f}")
34
+
35
+ if trait2_type == "quant" and sdY2 is None:
36
+ if 'EAF_2' not in data.columns or n2 is None:
37
+ print("Neither sdY2 nor EAF and n2 are provided for trait 2. Assuming sdY2 = 1.")
38
+ sdY2 = 1
39
+ else:
40
+ sdY2 = sdY_est(data['SE_2']**2, data['EAF_2'], n2)
41
+ print(f"Using EAF and n2 to estimate sdY2: {sdY2:.2f}")
42
+ # Calculate Bayes factors for each dataset
43
+ lABF_1 = approx_bf_estimates(data['BETA_1'], data['SE_1']**2,
44
+ trait_type=trait1_type, sdY=sdY1)
45
+ lABF_2 = approx_bf_estimates(data['BETA_2'], data['SE_2']**2,
46
+ trait_type=trait2_type, sdY=sdY2)
47
+
48
+ # Adjust priors based on number of SNPs
49
+ n_snps = len(data)
50
+ if n_snps * p1 >= 1:
51
+ p1 = 1 / (n_snps + 1)
52
+ if n_snps * p2 >= 1:
53
+ p2 = 1 / (n_snps + 1)
54
+ if n_snps * p12 >= 1:
55
+ p12 = 1 / (n_snps + 1)
56
+
57
+ # Calculate posterior probabilities
58
+ pp = combine_abf(lABF_1, lABF_2, p1, p2, p12)
59
+
60
+ # Add SNP-specific results
61
+ results_df = data.copy()
62
+ results_df['lABF_1'] = lABF_1
63
+ results_df['lABF_2'] = lABF_2
64
+ results_df['internal.sum.lABF'] = lABF_1 + lABF_2
65
+
66
+ # Calculate SNP-specific PP for H4
67
+ my_denom_log_abf = logsum(results_df['internal.sum.lABF'])
68
+ results_df['SNP.PP.H4'] = np.exp(results_df['internal.sum.lABF'] - my_denom_log_abf)
69
+
70
+ return {
71
+ 'summary': {
72
+ 'nsnps': n_snps,
73
+ **pp
74
+ },
75
+ 'results': results_df,
76
+ 'priors': {
77
+ 'p1': p1,
78
+ 'p2': p2,
79
+ 'p12': p12
80
+ }
81
+ }
82
+
83
+ def approx_bf_estimates(beta, varbeta, trait_type="quant", sdY=1, effect_priors={'quant': 0.15, 'cc': 0.2}):
84
+ """
85
+ Calculate approximate Bayes factors using regression estimates.
86
+
87
+ Args:
88
+ beta: effect size estimate
89
+ varbeta: variance of the effect size estimate
90
+ trait_type: either "quant" for quantitative trait or "cc" for case-control
91
+ sdY: standard deviation of the trait (for quantitative traits)
92
+ effect_priors: dictionary with prior effect sizes for quantitative and case-control traits
93
+
94
+ Returns:
95
+ array: log approximate Bayes factors
96
+ """
97
+ z = beta / np.sqrt(varbeta)
98
+
99
+ # Set prior standard deviation based on trait type
100
+ if trait_type == "quant":
101
+ sd_prior = effect_priors['quant'] * sdY
102
+ else: # case-control
103
+ sd_prior = effect_priors['cc']
104
+
105
+ r = sd_prior**2 / (sd_prior**2 + varbeta)
106
+ lABF = 0.5 * (np.log(1 - r) + (r * z**2))
107
+ return lABF
108
+
109
+ def logsum(x):
110
+ """Calculate log of sum of exponentials"""
111
+ my_max = np.max(x)
112
+ return my_max + np.log(np.sum(np.exp(x - my_max)))
113
+
114
+ def logdiff(x, y):
115
+ """Calculate log of difference of exponentials"""
116
+ my_max = max(x, y)
117
+ return my_max + np.log(exp(x - my_max) - np.exp(y - my_max))
118
+
119
+ def combine_abf(l1, l2, p1, p2, p12):
120
+ """Calculate posterior probabilities for different hypotheses"""
121
+ lsum = l1 + l2
122
+
123
+ lH0_abf = 0
124
+ lH1_abf = np.log(p1) + logsum(l1)
125
+ lH2_abf = np.log(p2) + logsum(l2)
126
+ lH3_abf = np.log(p1) + np.log(p2) + logdiff(logsum(l1) + logsum(l2), logsum(lsum))
127
+ lH4_abf = np.log(p12) + logsum(lsum)
128
+
129
+ all_abf = np.array([lH0_abf, lH1_abf, lH2_abf, lH3_abf, lH4_abf])
130
+ denom_log_abf = logsum(all_abf)
131
+ pp_abf = np.exp(all_abf - denom_log_abf)
132
+
133
+ return {
134
+ 'PP.H0.abf': pp_abf[0],
135
+ 'PP.H1.abf': pp_abf[1],
136
+ 'PP.H2.abf': pp_abf[2],
137
+ 'PP.H3.abf': pp_abf[3],
138
+ 'PP.H4.abf': pp_abf[4]
139
+ }
140
+
141
+ def sdY_est(vbeta, maf, n):
142
+ """
143
+ Estimate trait standard deviation given vectors of variance of coefficients, MAF and sample size.
144
+
145
+ Args:
146
+ vbeta: vector of variance of coefficients
147
+ maf: vector of MAF (same length as vbeta)
148
+ n: sample size
149
+
150
+ Returns:
151
+ float: estimated standard deviation of Y
152
+ """
153
+ oneover = 1/vbeta
154
+ nvx = 2 * n * maf * (1-maf)
155
+ # Fit linear regression through origin
156
+ coef = np.sum(nvx * oneover) / np.sum(oneover**2)
157
+ if coef < 0:
158
+ raise ValueError("Estimated sdY is negative - this can happen with small datasets, or those with errors. A reasonable estimate of sdY is required to continue.")
159
+ return np.sqrt(coef)
@@ -139,7 +139,7 @@ def extract_snps_func(snp_list, name, path=None):
139
139
  #Check that at least 1 variant has been extracted. If not, return "FAILED" to warn downstream functions (prs, association_test)
140
140
  log_path = output_path + ".log"
141
141
  with open(log_path, 'r') as log_file:
142
- if "0 variants remaining" in log_file.read():
142
+ if " 0 variants remaining" in log_file.read():
143
143
  print("None of the provided SNPs were found in the genetic data.")
144
144
  return "FAILED"
145
145
  else:
@@ -193,9 +193,9 @@ def find_proxies(
193
193
  snp_list,
194
194
  searchspace=None,
195
195
  reference_panel="eur",
196
- kb=5000,
196
+ kb=10000,
197
197
  r2=0.6,
198
- window_snps=5000,
198
+ window_snps=10000,
199
199
  threads=1
200
200
  ):
201
201
  """
@@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"
4
4
 
5
5
  [project]
6
6
  name = "genal-python" # Updated name for PyPI
7
- version = "1.2.7"
7
+ version = "1.2.9"
8
8
  authors = [{name = "Cyprien Rivier", email = "riviercyprien@gmail.com"}]
9
9
  description = "A python toolkit for polygenic risk scoring and mendelian randomization."
10
10
  readme = "README.md"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes