gwaslab 3.4.45__py3-none-any.whl → 3.4.46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

gwaslab/__init__.py CHANGED
@@ -44,4 +44,5 @@ from gwaslab.viz_plot_trumpetplot import plot_power
44
44
  from gwaslab.viz_plot_trumpetplot import plot_power_x
45
45
  from gwaslab.util_ex_process_h5 import process_vcf_to_hfd5
46
46
  from gwaslab.util_ex_run_susie import _run_susie_rss as run_susie_rss
47
- from gwaslab.io_read_tabular import _read_tabular as read_tabular
47
+ from gwaslab.io_read_tabular import _read_tabular as read_tabular
48
+ from gwaslab.util_in_meta import meta_analyze
gwaslab/bd_common_data.py CHANGED
@@ -298,6 +298,28 @@ def gtf_to_protein_coding(gtfpath,log=Log(),verbose=True):
298
298
 
299
299
  return protein_coding_path
300
300
 
301
+ def gtf_to_all_gene(gtfpath,log=Log(),verbose=True):
302
+ all_gene_path = gtfpath[:-6]+"all_genes.gtf.gz"
303
+ # if not existing, extract protein coding records and output to a new file
304
+ if not path.isfile(all_gene_path):
305
+
306
+ # get gene list
307
+ log.write(" - Extracting genes from {}".format(gtfpath),verbose=verbose)
308
+ gtf = read_gtf(gtfpath,usecols=["feature","gene_biotype","gene_id","gene_name"])
309
+ gene_list = gtf.loc[gtf["feature"]=="gene","gene_id"].values
310
+ log.write(" - Loaded {} genes.".format(len(gene_list)),verbose=verbose)
311
+
312
+ # extract entry using csv
313
+ gtf_raw = pd.read_csv(gtfpath,sep="\t",header=None,comment="#",dtype="string")
314
+ gtf_raw["_gene_id"] = gtf_raw[8].str.extract(r'gene_id "([\w\.-]+)"')
315
+ gtf_raw = gtf_raw.loc[ gtf_raw["_gene_id"].isin(gene_list) ,:]
316
+ gtf_raw = gtf_raw.drop("_gene_id",axis=1)
317
+
318
+ log.write(" - Extracted records are saved to : {} ".format(all_gene_path),verbose=verbose)
319
+ gtf_raw.to_csv(all_gene_path, header=None, index=None, sep="\t")
320
+
321
+ return all_gene_path
322
+
301
323
  ####################################################################################################################
302
324
  # From BioPython: https://github.com/biopython/biopython/blob/c5a6b1374267d769b19c1022b4b45472316e78b4/Bio/Seq.py#L36
303
325
  def _maketrans(complement_mapping):
gwaslab/g_Sumstats.py CHANGED
@@ -121,6 +121,7 @@ class Sumstats():
121
121
  snpr2=None,
122
122
  status=None,
123
123
  other=[],
124
+ usekeys=None,
124
125
  direction=None,
125
126
  verbose=True,
126
127
  study="Study_1",
@@ -200,6 +201,7 @@ class Sumstats():
200
201
  trait=trait,
201
202
  status=status,
202
203
  other=other,
204
+ usekeys=usekeys,
203
205
  verbose=verbose,
204
206
  readargs=readargs,
205
207
  log=self.log)
gwaslab/g_version.py CHANGED
@@ -15,8 +15,8 @@ def _get_version():
15
15
  def gwaslab_info():
16
16
  # version meta information
17
17
  dic={
18
- "version":"3.4.45",
19
- "release_date":"20240509"
18
+ "version":"3.4.46",
19
+ "release_date":"20240624"
20
20
  }
21
21
  return dic
22
22
 
@@ -868,8 +868,9 @@ def parallelizeassignrsid(sumstats, path, ref_mode="vcf",snpid="SNPID",rsid="rsI
868
868
  if is_enough_info == False: return sumstats
869
869
  ############################################################################################
870
870
 
871
- standardized_normalized = sumstats["STATUS"].str.match("\w\w\w[0][01234]\w\w", case=False, flags=0, na=False)
872
-
871
+ #standardized_normalized = sumstats["STATUS"].str.match("\w\w\w[0][01234]\w\w", case=False, flags=0, na=False)
872
+ standardized_normalized = sumstats["STATUS"] == sumstats["STATUS"]
873
+
873
874
  if rsid not in sumstats.columns:
874
875
  sumstats[rsid]=pd.Series(dtype="string")
875
876
 
@@ -55,6 +55,7 @@ def preformat(sumstats,
55
55
  trait=None,
56
56
  build=None,
57
57
  other=[],
58
+ usekeys=None,
58
59
  verbose=False,
59
60
  readargs=None,
60
61
  log=None):
@@ -65,6 +66,11 @@ def preformat(sumstats,
65
66
  dtype_dictionary ={}
66
67
 
67
68
  #######################################################################################################################################################
69
+ # workflow:
70
+ # 1. formatbook
71
+ # 2. user specified header
72
+ # 3. usekeys
73
+
68
74
  if fmt is not None:
69
75
  # loading format parameters
70
76
  log.write("Start to load format from formatbook....",verbose=verbose)
@@ -129,6 +135,8 @@ def preformat(sumstats,
129
135
 
130
136
  ################################################
131
137
  for key,value in rename_dictionary.items():
138
+ # check avaiable keys key->raw header
139
+ # usecols : a list of raw headers to load from file/DataFrame
132
140
  if key in raw_cols:
133
141
  usecols.append(key)
134
142
  if value in ["EA","NEA"]:
@@ -137,7 +145,7 @@ def preformat(sumstats,
137
145
  dtype_dictionary[value]="string"
138
146
 
139
147
  except ValueError:
140
- raise ValueError("Please input a path or a pd.DataFrame, and make sure the columns you specified are in the file.")
148
+ raise ValueError("Please input a path or a pd.DataFrame, and make sure the separator is correct and the columns you specified are in the file.")
141
149
 
142
150
  ###################################################################################################################################################
143
151
  ## check columns/datatype to use
@@ -276,6 +284,19 @@ def preformat(sumstats,
276
284
  else:
277
285
  study = raw_cols[9]
278
286
  usecols = usecols + [study]
287
+
288
+ if usekeys is not None:
289
+ # extract only specified keys
290
+ usecols_new =[]
291
+ for i in usekeys:
292
+ for k, v in rename_dictionary.items():
293
+ if i == v:
294
+ usecols_new.append(k)
295
+ usecols_valid =[]
296
+ for i in usecols_new:
297
+ if i in usecols:
298
+ usecols_valid.append(i)
299
+ usecols = usecols_valid
279
300
  #loading data ##########################################################################################################
280
301
 
281
302
  try:
@@ -1061,6 +1061,13 @@ def check_range(sumstats, var_range, header, coltocheck, cols_to_check, log, ver
1061
1061
  if sum(is_low_p) >0:
1062
1062
  log.warning("Extremely low P detected (P=0 or P < minimum positive value of float64) : {}".format(sum(is_low_p)))
1063
1063
  log.warning("Please consider using MLOG10P instead.")
1064
+
1065
+ if header=="INFO":
1066
+ is_high_info = sumstats["INFO"]>1
1067
+ if sum(is_high_info) >0:
1068
+ log.warning("High INFO detected (INFO>1) : {}".format(sum(is_high_info)))
1069
+ log.warning("max(INFO): {}".format(sumstats["INFO"].max()))
1070
+ log.warning("Please check if this is as expected.")
1064
1071
 
1065
1072
  if sum(~is_valid)>0:
1066
1073
  try:
@@ -1102,7 +1109,7 @@ def sanitycheckstats(sumstats,
1102
1109
  HR=(-100,100),
1103
1110
  HR_95L=(0,float("Inf")),
1104
1111
  HR_95U=(0,float("Inf")),
1105
- info=(0,1),
1112
+ info=(0,2),
1106
1113
  float_tolerence = 1e-7,
1107
1114
  verbose=True,
1108
1115
  log=Log()):
@@ -10,6 +10,7 @@ from gwaslab.g_vchange_status import vchange_status
10
10
  from gwaslab.qc_fix_sumstats import sortcoordinate
11
11
  from gwaslab.qc_fix_sumstats import start_to
12
12
  from gwaslab.qc_fix_sumstats import finished
13
+ from gwaslab.qc_fix_sumstats import _process_build
13
14
  from gwaslab.hm_harmonize_sumstats import is_palindromic
14
15
 
15
16
  import gc
@@ -430,8 +431,43 @@ def _filter_snp(sumstats, mode="in", ea="EA",nea="NEA", log=Log(),verbose=True):
430
431
  log.write("Finished filtering SNPs.",verbose=verbose)
431
432
  return snp
432
433
 
433
- def _exclude_hla(sumstats, chrom="CHR", pos="POS", lower=25000000 ,upper=34000000 ,log=Log(), verbose=True):
434
-
434
+ def _exclude_hla(sumstats, chrom="CHR", pos="POS", lower=None ,upper=None, build=None, mode="xmhc", log=Log(), verbose=True):
435
+
436
+ if build is not None:
437
+ build = _process_build(build = build,log = log,verbose = verbose)
438
+ # xMHC : HIST1H2AA ~ 7.6mb ~ RPL12P1
439
+ # reference: Horton, R., Wilming, L., Rand, V., Lovering, R. C., Bruford, E. A., Khodiyar, V. K., ... & Beck, S. (2004). Gene map of the extended human MHC. Nature Reviews Genetics, 5(12), 889-899.
440
+ # hg38: 25,726,063 ~ 33,400,644
441
+ # hg19 : 25,726,291 ~ 33,368,421
442
+
443
+ # HLA : GABBR1 ~ 3.78mb ~ KIFC1
444
+ # reference: Shiina, T., Hosomichi, K., Inoko, H., & Kulski, J. K. (2009). The HLA genomic loci map: expression, interaction, diversity and disease. Journal of human genetics, 54(1), 15-39.
445
+ # hg38: 29,602,238 ~ 33,409,896
446
+ # hg19: 29,570,015 ~ 33,377,673
447
+
448
+ if build == "19":
449
+ if mode =="xmhc":
450
+ lower=25000000
451
+ upper=34000000
452
+ if mode =="hla" or mode =="mhc":
453
+ lower=29500000
454
+ upper=33500000
455
+ if build == "38":
456
+ if mode =="xmhc":
457
+ lower=25000000
458
+ upper=34000000
459
+ if mode =="hla" or mode =="mhc":
460
+ lower=29500000
461
+ upper=33500000
462
+ else:
463
+ # -> 25,000,000 ~ 34,000,000
464
+ if mode =="xmhc":
465
+ lower=25000000
466
+ upper=34000000
467
+ if mode =="hla" or mode =="mhc":
468
+ lower=29500000
469
+ upper=33500000
470
+
435
471
  raw_len = len(sumstats)
436
472
 
437
473
  if str(sumstats[chrom].dtype) == "string":
@@ -11,6 +11,7 @@ from gwaslab.bd_common_data import get_chr_to_number
11
11
  from gwaslab.bd_common_data import get_number_to_chr
12
12
  from gwaslab.bd_common_data import get_chr_to_NC
13
13
  from gwaslab.bd_common_data import gtf_to_protein_coding
14
+ from gwaslab.bd_common_data import gtf_to_all_gene
14
15
  from gwaslab.bd_download import check_and_download
15
16
  from gwaslab.util_ex_gwascatalog import gwascatalog_trait
16
17
  from gwaslab.qc_fix_sumstats import check_dataframe_shape
@@ -38,6 +39,7 @@ def getsig(insumstats,
38
39
  wc_correction=False,
39
40
  build="19",
40
41
  source="ensembl",
42
+ gtf_path=None,
41
43
  verbose=True):
42
44
  """
43
45
  Extract the lead variants using a sliding window. P or MLOG10P will be used and converted to SCALEDP for sorting.
@@ -172,6 +174,7 @@ def getsig(insumstats,
172
174
  xymt=xymt,
173
175
  build=build,
174
176
  source=source,
177
+ gtf_path=gtf_path,
175
178
  verbose=verbose)
176
179
 
177
180
  # drop internal id
@@ -253,6 +256,7 @@ def annogene(
253
256
  xymt=["X","Y","MT"],
254
257
  build="19",
255
258
  source="ensembl",
259
+ gtf_path=None,
256
260
  verbose=True):
257
261
 
258
262
  log.write("Start to annotate variants with nearest gene name(s)...", verbose=verbose)
@@ -267,8 +271,13 @@ def annogene(
267
271
  #| gzip >Homo_sapiens.GRCh37.75.processed.chr.gtf.gz
268
272
 
269
273
  #gtf_path = check_and_download("ensembl_hg19_gtf_protein_coding")
270
- gtf_path = check_and_download("ensembl_hg19_gtf")
271
- gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
274
+ if gtf_path is None:
275
+ gtf_path = check_and_download("ensembl_hg19_gtf")
276
+ gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
277
+ else:
278
+ log.write(" -Using user-provided gtf:{}".format(gtf_path))
279
+ gtf_path = gtf_to_all_gene(gtf_path,log=log,verbose=verbose)
280
+
272
281
  gtf_db_path = gtf_path[:-2]+"db"
273
282
 
274
283
  data = Genome(
@@ -283,8 +292,13 @@ def annogene(
283
292
  elif build=="38":
284
293
  log.write(" -Assigning Gene name using ensembl_hg38_gtf for protein coding genes", verbose=verbose)
285
294
  #gtf_path = check_and_download("ensembl_hg38_gtf_protein_coding")
286
- gtf_path = check_and_download("ensembl_hg38_gtf")
287
- gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
295
+ if gtf_path is None:
296
+ gtf_path = check_and_download("ensembl_hg38_gtf")
297
+ gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
298
+ else:
299
+ log.write(" -Using user-provided gtf:{}".format(gtf_path))
300
+ gtf_path = gtf_to_all_gene(gtf_path,log=log,verbose=verbose)
301
+
288
302
  gtf_db_path = gtf_path[:-2]+"db"
289
303
  data = Genome(
290
304
  reference_name='GRCh38',
@@ -300,8 +314,13 @@ def annogene(
300
314
  if build=="19":
301
315
  log.write(" -Assigning Gene name using NCBI refseq latest GRCh37 for protein coding genes", verbose=verbose)
302
316
  #gtf_path = check_and_download("refseq_hg19_gtf_protein_coding")
303
- gtf_path = check_and_download("refseq_hg19_gtf")
304
- gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
317
+ if gtf_path is None:
318
+ gtf_path = check_and_download("refseq_hg19_gtf")
319
+ gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
320
+ else:
321
+ log.write(" -Using user-provided gtf:{}".format(gtf_path))
322
+ gtf_path = gtf_to_all_gene(gtf_path,log=log,verbose=verbose)
323
+
305
324
  gtf_db_path = gtf_path[:-2]+"db"
306
325
  data = Genome(
307
326
  reference_name='GRCh37',
@@ -315,8 +334,13 @@ def annogene(
315
334
  elif build=="38":
316
335
  log.write(" -Assigning Gene name using NCBI refseq latest GRCh38 for protein coding genes", verbose=verbose)
317
336
  #gtf_path = check_and_download("refseq_hg38_gtf_protein_coding")
318
- gtf_path = check_and_download("refseq_hg38_gtf")
319
- gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
337
+ if gtf_path is None:
338
+ gtf_path = check_and_download("refseq_hg38_gtf")
339
+ gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
340
+ else:
341
+ log.write(" -Using user-provided gtf:{}".format(gtf_path))
342
+ gtf_path = gtf_to_all_gene(gtf_path,log=log,verbose=verbose)
343
+
320
344
  gtf_db_path = gtf_path[:-2]+"db"
321
345
  data = Genome(
322
346
  reference_name='GRCh38',
@@ -0,0 +1,234 @@
1
+
2
+ import pandas as pd
3
+ import numpy as np
4
+ from scipy.stats.distributions import chi2
5
+ from scipy.stats import norm
6
+ from gwaslab.g_Log import Log
7
+ from gwaslab.io_to_pickle import load_data_from_pickle
8
+ from gwaslab.g_Sumstats import Sumstats
9
+ import gc
10
+
11
+ def meta_analyze(sumstats_list,random_effects=False, match_allele=True, log=Log()):
12
+
13
+ ###########################################################################
14
+ columns=["SNPID","CHR","POS","EA","NEA"]
15
+ results_df = pd.DataFrame(columns=columns)
16
+
17
+ log.write("Start to perform meta-analysis...")
18
+ log.write(" -Datasets:")
19
+ for index,sumstats_path in enumerate(sumstats_list):
20
+ if isinstance(sumstats_path, pd.DataFrame):
21
+ log.write(" -Sumstats #{}: {} ".format(index, sumstats_path))
22
+ elif isinstance(sumstats_path, Sumstats):
23
+ log.write(" -Sumstats #{}: {} ".format(index, sumstats_path))
24
+ else:
25
+ log.write(" -Sumstats #{}: {} ".format(index, sumstats_path))
26
+
27
+
28
+ # extract all variants information
29
+ log.write(" -Iterating through {} datasets to determine variant list...".format(len(sumstats_list)))
30
+
31
+ for index,sumstats_path in enumerate(sumstats_list):
32
+ sumstats = get_sumstats(sumstats_path,usekeys=["SNPID","CHR","POS","EA","NEA"])
33
+ new_rows = sumstats.loc[~sumstats["SNPID"].isin(results_df["SNPID"]),["SNPID","CHR","POS","EA","NEA"]]
34
+ log.write(" -Sumstats #{}: {} new variants (out of {}) are being added to analysis...".format(index, len(new_rows),len(sumstats)))
35
+
36
+ if len(new_rows)>0:
37
+ if len(results_df) == 0:
38
+ results_df = new_rows
39
+ else:
40
+ results_df = pd.concat([results_df, new_rows],ignore_index=True)
41
+ del sumstats
42
+ del new_rows
43
+ gc.collect()
44
+
45
+
46
+
47
+ ###########################################################################
48
+ log.write(" -Initiating result DataFrame...")
49
+ columns=["SNPID","CHR","POS","EA","NEA","_BETAW_SUM","_EA_N","_NEA_N","_BETA2W_SUM","_W_SUM","EAF","N","DIRECTION","BETA","SE","DOF"]
50
+ results_df = results_df.set_index("SNPID")
51
+ results_df["N"] = 0
52
+ results_df["_BETAW_SUM"] = 0.0
53
+ results_df["_BETA2W_SUM"] = 0.0
54
+ results_df["_W_SUM"] = 0.0
55
+ results_df["_W2_SUM"] = 0.0
56
+ results_df["_EA_N"] = 0.0
57
+ results_df["_NEA_N"] = 0.0
58
+ results_df["N"] = 0
59
+ results_df["DIRECTION"] = ""
60
+ results_df["BETA"] = 0.0
61
+ results_df["SE"] = 0.0
62
+ results_df["DOF"] = -1
63
+
64
+ dtype_dict ={
65
+ "_BETAW_SUM":"float64",
66
+ "_EA_N":"float64",
67
+ "_NEA_N":"float64",
68
+ "_BETA2W_SUM":"float64",
69
+ "_W_SUM":"float64",
70
+ "BETA":"float64",
71
+ "SE":"float64",
72
+ "N":"Int64",
73
+ "DOF":"Int64"
74
+ }
75
+ results_df=results_df.astype(dtype_dict)
76
+ ###########################################################################
77
+
78
+ log.write(" -Iterating through {} datasets to compute statistics for fixed-effect model...".format(len(sumstats_list)))
79
+ for index,sumstats_path in enumerate(sumstats_list):
80
+ to_use_sumstats = process_sumstats(sumstats_path,
81
+ results_df[["EA","NEA"]],
82
+ index=index,
83
+ match_allele=match_allele,)
84
+ sumstats_index = to_use_sumstats.index
85
+ results_df_not_in_sumstat_index = results_df.index[~results_df.index.isin(to_use_sumstats.index)]
86
+
87
+ # N and DOF
88
+ results_df.loc[sumstats_index, "N"] += to_use_sumstats["N"]
89
+ results_df.loc[sumstats_index, "DOF"] += 1
90
+
91
+ # BEAT and SE
92
+ results_df.loc[sumstats_index,"_BETA2W_SUM"] += to_use_sumstats["BETA"]**2 *(1/(to_use_sumstats["SE"]**2))
93
+ results_df.loc[sumstats_index,"_BETAW_SUM"] += to_use_sumstats["BETA"]*(1/(to_use_sumstats["SE"]**2))
94
+ results_df.loc[sumstats_index,"_W_SUM"] += 1/(to_use_sumstats["SE"]**2)
95
+ results_df.loc[sumstats_index,"_W2_SUM"] += results_df.loc[sumstats_index,"_W_SUM"]**2
96
+
97
+ # EAF
98
+ results_df.loc[sumstats_index,"_EA_N"] += to_use_sumstats["N"]*to_use_sumstats["EAF"]
99
+ results_df.loc[sumstats_index,"_NEA_N"] += to_use_sumstats["N"]*(1 - to_use_sumstats["EAF"])
100
+
101
+ # DIRECTION
102
+ beta_index = to_use_sumstats[to_use_sumstats["BETA"]>0].index
103
+ results_df.loc[beta_index, "DIRECTION"] += "+"
104
+ beta_index = to_use_sumstats[to_use_sumstats["BETA"]==0].index
105
+ results_df.loc[beta_index, "DIRECTION"] += "0"
106
+ beta_index = to_use_sumstats[to_use_sumstats["BETA"]<0].index
107
+ results_df.loc[beta_index, "DIRECTION"] += "-"
108
+ results_df.loc[results_df_not_in_sumstat_index, "DIRECTION"] += "?"
109
+
110
+ del to_use_sumstats
111
+ gc.collect()
112
+
113
+ ##############################################################################
114
+ # fixed - effect statistics
115
+ results_df["BETA"] = results_df["_BETAW_SUM"] / results_df["_W_SUM"]
116
+ results_df["EAF"] = results_df["_EA_N"] / (results_df["_EA_N"] + results_df["_NEA_N"])
117
+ results_df["SE"] = np.sqrt(1/results_df["_W_SUM"])
118
+ results_df["Z"] = results_df["BETA"] / results_df["SE"]
119
+ results_df["P"] = norm.sf(abs(results_df["Z"]))*2
120
+ results_df["Q"] = results_df["_BETA2W_SUM"] - (results_df["_BETAW_SUM"]**2 / results_df["_W_SUM"])
121
+
122
+ for dof in results_df["DOF"].unique():
123
+ results_df_dof_index = results_df["DOF"] == dof
124
+ results_df.loc[results_df_dof_index,"P_HET"] = chi2.sf(results_df.loc[results_df_dof_index, "Q"].values,dof)
125
+ gc.collect()
126
+
127
+ results_df["I2_HET"] = (results_df["Q"] - results_df["DOF"])/results_df["Q"]
128
+ results_df.loc[results_df["I2_HET"]<0, "I2_HET"] = 0
129
+
130
+ results_df=results_df.drop(columns=["_EA_N","_NEA_N"])
131
+ gc.collect()
132
+
133
+ ###########################################################################
134
+ if random_effects==True:
135
+ log.write(" -Iterating through {} datasets to compute statistics for random-effects model...".format(len(sumstats_list)))
136
+ results_df["_R2"] = (results_df["Q"] - results_df["DOF"])/(results_df["_W_SUM"] - (results_df["_W2_SUM"]/results_df["_W_SUM"]))
137
+ results_df.loc[results_df["_R2"]<0, "_R2"] = 0
138
+ variant_index_random = results_df[results_df["_R2"]>0].index
139
+
140
+ results_df["_BETAW_SUM_R"] = 0.0
141
+ results_df["_W_SUM_R"] = 0.0
142
+ results_df["BETA_RANDOM"] = results_df["BETA"]
143
+ results_df["SE_RANDOM"] = results_df["SE"]
144
+
145
+ for index,sumstats_path in enumerate(sumstats_list):
146
+ to_use_sumstats = process_sumstats(sumstats_path,
147
+ results_df.loc[variant_index_random, ["EA","NEA"]],
148
+ index=index,
149
+ match_allele=match_allele,
150
+ extract_index=variant_index_random)
151
+
152
+ sumstats_index = to_use_sumstats.index
153
+
154
+ # BEAT and SE
155
+ results_df.loc[sumstats_index,"_BETAW_SUM_R"] += to_use_sumstats["BETA"]*(1/(to_use_sumstats["SE"]**2 + results_df.loc[sumstats_index,"_R2"]))
156
+ results_df.loc[sumstats_index,"_W_SUM_R"] += 1/(to_use_sumstats["SE"]**2 + results_df.loc[sumstats_index,"_R2"])
157
+
158
+ del to_use_sumstats
159
+ del sumstats_index
160
+ gc.collect()
161
+
162
+ results_df.loc[variant_index_random,"BETA_RANDOM"] = results_df.loc[variant_index_random,"_BETAW_SUM_R"] / results_df.loc[variant_index_random,"_W_SUM_R"]
163
+ results_df.loc[variant_index_random,"SE_RANDOM"] = np.sqrt(1/results_df.loc[variant_index_random,"_W_SUM_R"])
164
+ results_df["Z_RANDOM"] = results_df["BETA_RANDOM"] / results_df["SE_RANDOM"]
165
+ results_df["P_RANDOM"] = norm.sf(abs(results_df["Z_RANDOM"]))*2
166
+ results_df = results_df.drop(columns=["_BETAW_SUM_R","_W_SUM_R"])
167
+
168
+ gc.collect()
169
+ ###########################################################################
170
+ results_df = results_df.drop(columns=["_BETAW_SUM","_BETA2W_SUM","_W_SUM","_R2","_W2_SUM"]).sort_values(by=["CHR","POS"])
171
+ gc.collect()
172
+ log.write("Finished meta-analysis successfully!")
173
+
174
+ return results_df
175
+
176
+ def process_sumstats(sumstats_path, results_df, index, extract_index=None, match_allele=True, log=Log()):
177
+
178
+ if extract_index is None:
179
+ extract_index = results_df.index
180
+
181
+ sumstats = get_sumstats(sumstats_path)
182
+
183
+ to_use_sumstats = sumstats.loc[sumstats["SNPID"].isin(extract_index.values),["SNPID","EA","NEA","BETA","N","SE","EAF"]]
184
+
185
+ if len(to_use_sumstats)>0:
186
+ n_pre_dup = len(to_use_sumstats)
187
+ log.write(" -Processing {} variants from sumstats #{}".format(len(to_use_sumstats), index))
188
+
189
+ to_use_sumstats = to_use_sumstats.drop_duplicates(subset="SNPID").set_index("SNPID")
190
+ n_post_dup = len(to_use_sumstats)
191
+
192
+ if n_pre_dup - n_post_dup>0:
193
+ log.write(" -Dropping {} duplicated variants from sumstats #{}".format(n_pre_dup - n_post_dup, index))
194
+
195
+ if match_allele==True:
196
+ sumstats_index = to_use_sumstats.index
197
+ # drop not matched
198
+ is_match = (to_use_sumstats.loc[sumstats_index,"EA"] == results_df.loc[sumstats_index, "EA"] )&(to_use_sumstats.loc[sumstats_index,"NEA"] == results_df.loc[sumstats_index, "NEA"])
199
+ is_flip = (to_use_sumstats.loc[sumstats_index,"EA"] == results_df.loc[sumstats_index, "NEA"])&( to_use_sumstats.loc[sumstats_index,"NEA"] == results_df.loc[sumstats_index, "EA"])
200
+ is_flip = is_flip | ((to_use_sumstats.loc[sumstats_index,"NEA"] == results_df.loc[sumstats_index, "EA"])&( to_use_sumstats.loc[sumstats_index,"EA"] == results_df.loc[sumstats_index, "NEA"]))
201
+ is_to_use = is_match|is_flip
202
+
203
+ if sum(~is_to_use)>0:
204
+ log.write(" -Dropping {} variants with unmatched alleles from sumstats #{}".format(sum(~is_to_use), index))
205
+
206
+ to_use_sumstats.loc[is_flip[is_flip].index, "BETA"] = -to_use_sumstats.loc[is_flip[is_flip].index, "BETA"]
207
+ to_use_sumstats.loc[is_flip[is_flip].index, "EAF"] = 1-to_use_sumstats.loc[is_flip[is_flip].index, "EAF"]
208
+ to_use_sumstats = to_use_sumstats.loc[is_to_use[is_to_use].index,:]
209
+
210
+ gc.collect()
211
+
212
+ return to_use_sumstats
213
+
214
+ def get_sumstats(input_path,usekeys=None):
215
+ if isinstance(input_path, tuple):
216
+ path = input_path[0]
217
+ path_args = input_path[1]
218
+ else:
219
+ path = input_path
220
+ path_args={}
221
+
222
+ if isinstance(path, pd.DataFrame):
223
+ sumstats = Sumstats(path,fmt="auto",verbose=False,usekeys=usekeys,**path_args).data
224
+ elif isinstance(path, Sumstats):
225
+ sumstats = path.data
226
+ if usekeys is not None:
227
+ sumstats = sumstats[usekeys]
228
+ elif path[-6:] == "pickle":
229
+ sumstats = load_data_from_pickle(path)
230
+ if usekeys is not None:
231
+ sumstats = sumstats[usekeys]
232
+ else:
233
+ sumstats = Sumstats(path,fmt="auto",verbose=False,usekeys=usekeys,**path_args).data
234
+ return sumstats
@@ -0,0 +1,58 @@
1
+
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+ def snphwe(obs_hets, obs_hom1, obs_hom2):
6
+ # Convert cpp code from (Jeremy McRae) to python
7
+ # https://github.com/jeremymcrae/snphwe/blob/master/src/snp_hwe.cpp
8
+ #/* (original comments)
9
+ #// This code implements an exact SNP test of Hardy-Weinberg Equilibrium as
10
+ #// described in Wigginton, JE, Cutler, DJ, and Abecasis, GR (2005) A Note on
11
+ #// Exact Tests of Hardy-Weinberg Equilibrium. AJHG 76: 887-893
12
+ #//
13
+ #// Written by Jan Wigginton
14
+ #*/
15
+
16
+ obs_homr = min(obs_hom1, obs_hom2)
17
+ obs_homc = max(obs_hom1, obs_hom2)
18
+
19
+ rare = 2 * obs_homr + obs_hets
20
+ genotypes = obs_hets + obs_homc + obs_homr
21
+
22
+ probs = np.array([0.0 for i in range(rare +1)])
23
+
24
+ mid = rare * (2 * genotypes - rare) // (2 * genotypes)
25
+
26
+ if mid % 2 != rare%2:
27
+ mid += 1
28
+
29
+ probs[mid] = 1.0
30
+
31
+ sum_p = 1 #probs[mid]
32
+ curr_homr = (rare - mid) // 2
33
+ curr_homc = genotypes - mid - curr_homr
34
+
35
+
36
+ for curr_hets in range(mid, 1, -2):
37
+ probs[curr_hets - 2] = probs[curr_hets] * curr_hets * (curr_hets - 1.0)/ (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0))
38
+ sum_p+= probs[curr_hets - 2]
39
+ curr_homr += 1
40
+ curr_homc += 1
41
+
42
+ curr_homr = (rare - mid) // 2
43
+ curr_homc = genotypes - mid - curr_homr
44
+
45
+ for curr_hets in range(mid, rare-1, 2):
46
+ probs[curr_hets + 2] = probs[curr_hets] * 4.0 * curr_homr * curr_homc/ ((curr_hets + 2.0) * (curr_hets + 1.0))
47
+ sum_p += probs[curr_hets + 2]
48
+ curr_homr -= 1
49
+ curr_homc -= 1
50
+
51
+ target = probs[obs_hets]
52
+ p_hwe = 0.0
53
+
54
+ for p in probs:
55
+ if p <= target :
56
+ p_hwe += p / sum_p
57
+
58
+ return min(p_hwe,1)
@@ -0,0 +1,111 @@
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ import pandas as pd
4
+ from gwaslab.g_Log import Log
5
+
6
+ #STATE NO. MNEMONIC DESCRIPTION COLOR NAME COLOR CODE
7
+ #1 TssA Active TSS Red 255,0,0
8
+ #2 TssAFlnk Flanking Active TSS Orange Red 255,69,0
9
+ #3 TxFlnk Transcr. at gene 5' and 3' LimeGreen 50,205,50
10
+ #4 Tx Strong transcription Green 0,128,0
11
+ #5 TxWk Weak transcription DarkGreen 0,100,0
12
+ #6 EnhG Genic enhancers GreenYellow 194,225,5
13
+ #7 Enh Enhancers Yellow 255,255,0
14
+ #8 ZNF/Rpts ZNF genes & repeats Medium Aquamarine 102,205,170
15
+ #9 Het Heterochromatin PaleTurquoise 138,145,208
16
+ #10 TssBiv Bivalent/Poised TSS IndianRed 205,92,92
17
+ #11 BivFlnk Flanking Bivalent TSS/Enh DarkSalmon 233,150,122
18
+ #12 EnhBiv Bivalent Enhancer DarkKhaki 189,183,107
19
+ #13 ReprPC Repressed PolyComb Silver 128,128,128
20
+ #14 ReprPCWk Weak Repressed PolyComb Gainsboro 192,192,192
21
+ #15 Quies Quiescent/Low White 255,255,255
22
+
23
+ color_dict={
24
+ "E1": np.array([255,0,0]),
25
+ "E2": np.array([255,69,0]),
26
+ "E3": np.array([50,205,50]),
27
+ "E4": np.array([0,128,0]),
28
+ "E5": np.array([0,100,0]),
29
+ "E6": np.array([194,225,5]),
30
+ "E7": np.array([255,255,0]),
31
+ "E8": np.array([102,205,170]),
32
+ "E9": np.array([138,145,208]),
33
+ "E10":np.array([205,92,92]),
34
+ "E11":np.array([233,150,122]),
35
+ "E12":np.array([189,183,107]),
36
+ "E13":np.array([128,128,128]),
37
+ "E14":np.array([192,192,192]),
38
+ "E15":np.array([255,255,255])
39
+ }
40
+
41
+ color_dict_i={
42
+ 1: np.array([255,0,0]),
43
+ 2: np.array([255,69,0]),
44
+ 3: np.array([50,205,50]),
45
+ 4: np.array([0,128,0]),
46
+ 5: np.array([0,100,0]),
47
+ 6: np.array([194,225,5]),
48
+ 7: np.array([255,255,0]),
49
+ 8: np.array([102,205,170]),
50
+ 9: np.array([138,145,208]),
51
+ 10:np.array([205,92,92]),
52
+ 11:np.array([233,150,122]),
53
+ 12:np.array([189,183,107]),
54
+ 13:np.array([128,128,128]),
55
+ 14:np.array([192,192,192]),
56
+ 15:np.array([255,255,255])
57
+ }
58
+
59
+
60
+ def _plot_chromatin_state(region_chromatin_files,
61
+ region_chromatin_labels,
62
+ region,
63
+ fig,
64
+ ax,
65
+ xlim_i,
66
+ log=Log(),
67
+ verbose=True):
68
+ '''
69
+ files : a list of numbers
70
+ '''
71
+ target_chr = region[0]
72
+ target_start = region[1]
73
+ target_end = region[2]
74
+
75
+ offset_i = xlim_i[0] - region[1]
76
+
77
+ ax.set_ylim([-0.05,0.1*len(region_chromatin_files)-0.05])
78
+ ax.set_xlim([offset_i+target_start,offset_i+target_end])
79
+
80
+ px_for_01 = ax.transData.transform([0,0])[1] - ax.transData.transform([0,0.1])[1]
81
+
82
+ point=fig.dpi/72
83
+ points_for_01 = px_for_01*72 / fig.dpi
84
+
85
+ # each tissue
86
+ for i,file in enumerate(region_chromatin_files):
87
+ log.write(" -Loading : {}".format(file), verbose=verbose)
88
+ enh = pd.read_csv(file,sep="\t",header=None)
89
+ enh.columns=["ID","START","END","STATE"]
90
+ enh["CHR"] = enh["ID"].str.extract(r"chr([0-9]+)").astype("float").astype("Int64")
91
+ enh["STATE_i"] = enh["STATE"].str.extract(r"([0-9]+)_*").astype("float").astype("Int64")
92
+ enh_in_region = (enh["CHR"] == target_chr) & ((enh["END"] > target_start) & (enh["START"]<target_end))
93
+ df =enh.loc[enh_in_region,["STATE_i","START","END"]].sort_values("STATE_i",ascending=False)
94
+ log.write(" -Number of records in specified region: {}".format(len(df)), verbose=verbose)
95
+ # each block
96
+ for index, row in df.iterrows():
97
+ color=color_dict_i[row["STATE_i"]]
98
+ ax.plot([offset_i + row["START"] ,offset_i + row["END"]],
99
+ [i*0.1,i*0.1],
100
+ c=color/255,linewidth=points_for_01,solid_capstyle="butt")
101
+
102
+ ## add stripe label
103
+ if len(region_chromatin_labels) == len(region_chromatin_files):
104
+ ax.set_yticks([i*0.1 for i in range(len(region_chromatin_labels))],
105
+ region_chromatin_labels)
106
+ else:
107
+ ax.set_yticks(ticks=[])
108
+
109
+ ax.set_xticks(ticks=[])
110
+ ax.invert_yaxis()
111
+ return fig
@@ -75,7 +75,10 @@ def compare_effect(path1,
75
75
  if scaled == True:
76
76
  scaled1 = True
77
77
  scaled2 = True
78
-
78
+ if is_q_mc=="fdr" or is_q_mc=="bon":
79
+ is_q = True
80
+ else:
81
+ raise ValueError("Please select either fdr or bon for is_q_mc.")
79
82
  if save_args is None:
80
83
  save_args = {"dpi":300,"facecolor":"white"}
81
84
  if reg_box is None:
@@ -133,6 +133,7 @@ def mqqplot(insumstats,
133
133
  anno_style="right",
134
134
  anno_fixed_arm_length=None,
135
135
  anno_source = "ensembl",
136
+ anno_gtf_path=None,
136
137
  anno_adjust=False,
137
138
  anno_max_iter=100,
138
139
  arm_offset=50,
@@ -816,6 +817,7 @@ def mqqplot(insumstats,
816
817
  log=log,
817
818
  build=build,
818
819
  source=anno_source,
820
+ gtf_path=anno_gtf_path,
819
821
  verbose=verbose).rename(columns={"GENE":"Annotation"})
820
822
  log.write("Finished extracting variants for annotation...",verbose=verbose)
821
823
 
@@ -365,6 +365,10 @@ def _plot_recombination_rate(sumstats,pos, region, ax1, rr_path, rr_chr_dict, r
365
365
 
366
366
  rc = rc.loc[(rc["Position(bp)"]<region[2]) & (rc["Position(bp)"]>region[1]),:]
367
367
  ax4.plot(rc_track_offset+rc["Position(bp)"],rc["Rate(cM/Mb)"],color="#5858FF",zorder=1)
368
+
369
+ ax1.set_zorder(ax4.get_zorder()+1)
370
+ ax1.patch.set_visible(False)
371
+
368
372
  if rr_ylabel:
369
373
  ax4.set_ylabel("Recombination rate(cM/Mb)")
370
374
  if rr_lim!="max":
@@ -22,6 +22,7 @@ from gwaslab.bd_common_data import get_number_to_chr
22
22
  from gwaslab.bd_common_data import get_recombination_rate
23
23
  from gwaslab.bd_common_data import get_gtf
24
24
  from gwaslab.viz_aux_reposition_text import adjust_text_position
25
+ from gwaslab.viz_aux_chromatin import _plot_chromatin_state
25
26
  from gwaslab.viz_aux_quickfix import _quick_fix
26
27
  from gwaslab.viz_aux_quickfix import _get_largenumber
27
28
  from gwaslab.viz_aux_quickfix import _quick_add_tchrpos
@@ -37,15 +38,20 @@ from gwaslab.io_to_pickle import load_data_from_pickle
37
38
  from gwaslab.g_Sumstats import Sumstats
38
39
  from gwaslab.viz_aux_save_figure import save_figure
39
40
  from gwaslab.viz_plot_mqqplot import mqqplot
41
+ import matplotlib.patches as patches
40
42
 
41
43
  def plot_stacked_mqq(objects,
42
44
  vcfs=None,
43
45
  mode="r",
44
46
  mqqratio=3,
45
47
  region=None,
48
+ region_chromatin_height=0.1,
49
+ region_chromatin_files = None,
50
+ region_chromatin_labels= None,
46
51
  titles= None,
47
52
  title_pos=None,
48
53
  title_args=None,
54
+ #title_box = None,
49
55
  gtf=None,
50
56
  gene_track_height=0.5,
51
57
  fig_args=None,
@@ -72,11 +78,11 @@ def plot_stacked_mqq(objects,
72
78
  fig_args = {"dpi":200}
73
79
  if region_lead_grid_line is None:
74
80
  region_lead_grid_line = {"alpha":0.5,"linewidth" : 2,"linestyle":"--","color":"#FF0000"}
75
- if title_pos is None:
76
- title_pos = [0.01,0.97]
77
- if title_args is None:
78
- title_args = {}
79
-
81
+ if region_chromatin_files is None:
82
+ region_chromatin_files = []
83
+ region_chromatin_height = len(region_chromatin_files) * region_chromatin_height
84
+ if region_chromatin_labels is None:
85
+ region_chromatin_labels = []
80
86
  # create figure and axes ##################################################################################################################
81
87
  if mode=="r":
82
88
  if len(vcfs)==1:
@@ -84,9 +90,15 @@ def plot_stacked_mqq(objects,
84
90
  n_plot = len(sumstats_list)
85
91
  n_plot_plus_gene_track = n_plot + 1
86
92
 
93
+ if len(region_chromatin_files)>0 and mode=="r":
94
+ height_ratios = [1 for i in range(n_plot_plus_gene_track-1)]+[region_chromatin_height]+[gene_track_height]
95
+ n_plot_plus_gene_track +=1
96
+ else:
97
+ height_ratios = [1 for i in range(n_plot_plus_gene_track-1)]+[gene_track_height]
98
+
87
99
  fig_args["figsize"] = [16,subplot_height*n_plot_plus_gene_track]
88
100
  fig, axes = plt.subplots(n_plot_plus_gene_track, 1, sharex=True,
89
- gridspec_kw={'height_ratios': [1 for i in range(n_plot_plus_gene_track-1)]+[gene_track_height]},
101
+ gridspec_kw={'height_ratios': height_ratios},
90
102
  **fig_args)
91
103
  plt.subplots_adjust(hspace=region_hspace)
92
104
  elif mode=="m":
@@ -179,18 +191,58 @@ def plot_stacked_mqq(objects,
179
191
  **mqq_args_for_each_plot[index]
180
192
  )
181
193
  lead_variants_is[index] = (lead_i,lead_i2)
182
-
194
+
195
+ if len(region_chromatin_files)>0 and mode=="r":
196
+ xlim_i = axes[-1].get_xlim()
197
+ fig = _plot_chromatin_state( region_chromatin_files = region_chromatin_files,
198
+ region_chromatin_labels = region_chromatin_labels,
199
+ region = region,
200
+ fig = fig,
201
+ ax = axes[-2],
202
+ xlim_i=xlim_i,
203
+ log=log,
204
+ verbose=verbose)
183
205
  # adjust labels
184
206
  # drop labels for each plot
185
207
  # set a common laebl for all plots
186
208
 
187
-
188
- if titles is not None:
209
+ #if title_box is None:
210
+ # title_box = dict(boxstyle='square', facecolor='white', alpha=1.0, edgecolor="black")
211
+ # title_box = {}
212
+
213
+ if title_args is None:
214
+ title_args = {}
215
+ if titles is not None and mode=="r":
216
+ if title_pos is None:
217
+ title_pos = [0.01,0.01]
189
218
  for index,title in enumerate(titles):
190
- axes[index].text(title_pos[0], title_pos[1] , title, transform=axes[index].transAxes,ha="left", va='top',**title_args)
219
+
220
+ current_text = axes[index].text(title_pos[0], title_pos[1] , title, transform=axes[index].transAxes,ha="left", va='bottom',zorder=999999, **title_args)
221
+ r = fig.canvas.get_renderer()
222
+ bb = current_text.get_window_extent(renderer=r).transformed(axes[index].transAxes.inverted())
223
+ width = bb.width
224
+ height = bb.height
225
+
226
+ rect = patches.Rectangle((0.0,0.0),
227
+ height=height + 0.02*2,
228
+ width=width + 0.01*2,
229
+ transform=axes[index].transAxes,
230
+ linewidth=1,
231
+ edgecolor='black',
232
+ facecolor='white',
233
+ alpha=1.0,
234
+ zorder=99998)
235
+ axes[index].add_patch(rect)
236
+ rect.set(zorder=99998)
237
+ else:
238
+ if title_pos is None:
239
+ title_pos = [0.01,0.97]
240
+ for index,title in enumerate(titles):
241
+ axes[index].text(title_pos[0], title_pos[1] , title, transform=axes[index].transAxes,ha="left", va='top',zorder=999999, **title_args)
242
+
191
243
  ##########################################################################################################################################
192
244
  # draw the line for lead variants
193
- _draw_grid_line_for_lead_variants(mode, lead_variants_is, n_plot, axes, region_lead_grid_line)
245
+ _draw_grid_line_for_lead_variants(mode, lead_variants_is, n_plot, axes, region_lead_grid_line,region_chromatin_files)
194
246
 
195
247
  ##########################################################################################################################################
196
248
  _drop_old_y_labels(axes, n_plot)
@@ -208,12 +260,16 @@ def _drop_old_y_labels(axes, n_plot):
208
260
  for index in range(n_plot):
209
261
  axes[index].set_ylabel("")
210
262
 
211
- def _draw_grid_line_for_lead_variants(mode, lead_variants_is, n_plot, axes, region_lead_grid_line):
263
+ def _draw_grid_line_for_lead_variants(mode, lead_variants_is, n_plot, axes, region_lead_grid_line,region_chromatin_files):
264
+ if len(region_chromatin_files)>0:
265
+ n_plot_and_track = n_plot+2
266
+ else:
267
+ n_plot_and_track = n_plot+1
212
268
  if mode=="r":
213
269
  for index, sig_is in lead_variants_is.items():
214
270
  for sig_i in sig_is:
215
271
  if sig_i is not None:
216
- for each_axis_index in range(n_plot + 1):
272
+ for each_axis_index in range(n_plot_and_track):
217
273
  axes[each_axis_index].axvline(x=sig_i, zorder=2,**region_lead_grid_line)
218
274
 
219
275
  def _add_new_y_label(mode, fig, gene_track_height,n_plot,subplot_height ):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gwaslab
3
- Version: 3.4.45
3
+ Version: 3.4.46
4
4
  Summary: A collection of handy tools for GWAS SumStats
5
5
  Author-email: Yunye <yunye@gwaslab.com>
6
6
  Project-URL: Homepage, https://cloufield.github.io/gwaslab/
@@ -13,8 +13,8 @@ Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  License-File: LICENSE_before_v3.4.39
15
15
  Requires-Dist: pandas !=1.5,>=1.3
16
- Requires-Dist: numpy >=1.21.2
17
- Requires-Dist: matplotlib !=3.7.2,>=3.5
16
+ Requires-Dist: numpy <2,>=1.21.2
17
+ Requires-Dist: matplotlib !=3.7.2,<3.9,>=3.5
18
18
  Requires-Dist: seaborn >=0.12
19
19
  Requires-Dist: scipy >=1.12
20
20
  Requires-Dist: pySAM ==0.22.1
@@ -51,7 +51,7 @@ Warning: Known issues of GWASLab are summarized in [https://cloufield.github.io/
51
51
  ### install via pip
52
52
 
53
53
  ```
54
- pip install gwaslab==3.4.43
54
+ pip install gwaslab==3.4.45
55
55
  ```
56
56
 
57
57
  ```python
@@ -90,7 +90,7 @@ Create a Python 3.9 environment and install gwaslab using pip:
90
90
  ```
91
91
  conda env create -n gwaslab_test -c conda-forge python=3.9
92
92
  conda activate gwaslab
93
- pip install gwaslab==3.4.43
93
+ pip install gwaslab==3.4.45
94
94
  ```
95
95
 
96
96
  or create a new environment using yml file [environment_3.4.40.yml](https://github.com/Cloufield/gwaslab/blob/main/environment_3.4.40.yml)
@@ -1,22 +1,22 @@
1
- gwaslab/__init__.py,sha256=dFnrh4L620F5JirsSF98SmkuligA-fybIGdBF6r9Ims,2386
2
- gwaslab/bd_common_data.py,sha256=v98X3tdRNOVE2gCiSHkfyBb0pSIjTk5IFG8A725Oj3o,12639
1
+ gwaslab/__init__.py,sha256=7TKJaODdpeuQKibL7gIEa4MtyQ0pmrU-vIHQ-Et27lQ,2433
2
+ gwaslab/bd_common_data.py,sha256=qr6OMbBaTH2Smfu8347SO9NmF410tn8dq8pRGF5-OpY,13751
3
3
  gwaslab/bd_config.py,sha256=TP-r-DPhJD3XnRYZbw9bQHXaDIkiRgK8bG9HCt-UaLc,580
4
4
  gwaslab/bd_download.py,sha256=cDDk2C5IvjeAzvPvVYGTkI4Ss33DUtEDjGo8eAbQRvY,15663
5
5
  gwaslab/bd_get_hapmap3.py,sha256=asNjQYeGfQi8u3jnfenRvDdKMs5ptql5wpcUzqMlwUI,3937
6
6
  gwaslab/cache_manager.py,sha256=HOTnSkCOyGEPLRl90WT8D_6pAdI8d8AzenMIDGuCeWc,28113
7
7
  gwaslab/g_Log.py,sha256=C3Zv-_6c3C9ms8bgQ-ytplz22sjk7euqXYkWr9zNeAs,1573
8
8
  gwaslab/g_Phenotypes.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- gwaslab/g_Sumstats.py,sha256=NOEQd00guGch_GIt5bHv1wcrAvETfChqzmtgm-nIx_I,35298
9
+ gwaslab/g_Sumstats.py,sha256=TUcFQFyODS_-FYMdXDvrBijG4Qtfi1igIWM-eEgb0nc,35352
10
10
  gwaslab/g_SumstatsPair.py,sha256=20snPb4SlI6ftMGVjgxAuyxsxYRQF-GzzlBSnoB-3Lo,8851
11
11
  gwaslab/g_SumstatsT.py,sha256=u_DighLMnMxwTLnqm-B58pA0G6WXRj6pudPyKMVKjSU,2133
12
12
  gwaslab/g_Sumstats_summary.py,sha256=FECvvFXJVKaCX5dggBvvk9YvJ6AbdbcLfjltysX7wEE,6380
13
13
  gwaslab/g_meta.py,sha256=htWlgURWclm9R6UqFcX1a93WN27xny7lGUeyJZOtszQ,2583
14
14
  gwaslab/g_vchange_status.py,sha256=jLoVzMJFhB5k_cJKzHuBNc2HZGBWydAunCNa0n_d54g,1923
15
- gwaslab/g_version.py,sha256=49_gR8lEQ_jgmfO9XJszEzuzDIESj5dHj6gta3Ilkmw,1818
15
+ gwaslab/g_version.py,sha256=g2bR-qFeFvLADj57VXMT5dufwba2YGD86hfDHRYURfU,1818
16
16
  gwaslab/hm_casting.py,sha256=FqP4EQl83Q2OKLw004OgLIvUH795TVCGwziLk5jsHqY,11368
17
- gwaslab/hm_harmonize_sumstats.py,sha256=ympk2MZkbb0MnZ1n2ajkV36L8EAm7nBEaYhjqjI38tU,78548
17
+ gwaslab/hm_harmonize_sumstats.py,sha256=1hjUdle2DSKHGBp2BktfFqf-QHU_q2xWl_mPhiYc_ZA,78616
18
18
  gwaslab/hm_rsid_to_chrpos.py,sha256=ODWREO0jPN0RAfNzL5fRzSRANfhiksOvUVPuEsFZQqA,6552
19
- gwaslab/io_preformat_input.py,sha256=w62JLAr16Ru0EgUtBCEV2eXRO89OqhidQxwf2IPAM38,20014
19
+ gwaslab/io_preformat_input.py,sha256=AZ43WGqVTzbo3XtClWhjRjsj6pBR9stw6JBL_TZ461U,20673
20
20
  gwaslab/io_read_ldsc.py,sha256=8S9n4imgl4d0WPms_GYld-6uUM5z7iWGiCA-M814kzY,12123
21
21
  gwaslab/io_read_tabular.py,sha256=EG-C6KhCutt4J4LlOMgXnqzJvU-EZXzVhMvaDFnHrMM,2380
22
22
  gwaslab/io_to_formats.py,sha256=QuGWdvnAamaZAuhymj-0SuNBaKz1maTTyH396gvVaO8,29229
@@ -28,7 +28,7 @@ gwaslab/ldsc_parse.py,sha256=MBnfgcWlV4oHp9MoDRh1mpilaHhAR15Af77hMFn4-5k,10564
28
28
  gwaslab/ldsc_regressions.py,sha256=yzbGjgNV7u-SWXNPsh9S8y9mK97Bim_Nmad9G9V18ZU,30078
29
29
  gwaslab/ldsc_sumstats.py,sha256=O0olsDxKlh1MJ1gAuEN1t40rxhajOEwOQ20ak7xoDrI,26245
30
30
  gwaslab/qc_check_datatype.py,sha256=kW68uk4dTLOU2b1dHoVat6n0loundDysAjIqxsXW28Q,3379
31
- gwaslab/qc_fix_sumstats.py,sha256=cpJibJ_77p4cg39R4zRunhOK2deIK4PfQA9wmYZgyqk,92745
31
+ gwaslab/qc_fix_sumstats.py,sha256=-DQz5dPW6YXXVP-LV2Txa4lJrpZHhqAoKNny6IYAW18,93100
32
32
  gwaslab/run_script.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
33
  gwaslab/util_ex_calculate_ldmatrix.py,sha256=LpE__LoYRHLgVKlCHo6lYWlz9LEUVUDqYPEAP-Svbm0,14598
34
34
  gwaslab/util_ex_calculate_prs.py,sha256=5l1eiZs8YwIpEgp7i3IurP8n5KwQM5awbG9fWSm4iT4,9053
@@ -47,23 +47,26 @@ gwaslab/util_in_calculate_power.py,sha256=JfHJFg3tNF0f4NHgWlzVW2mSxCiP07mAHIyEfV
47
47
  gwaslab/util_in_convert_h2.py,sha256=a8Cbudt3xn9WP2bPc-7ysuowB-LYub8j8GeDXl7Lk7Q,6483
48
48
  gwaslab/util_in_correct_winnerscurse.py,sha256=Gp--yAQ8MMzdkWIvXP9C1BHVjZc-YzqHfYWhAj19w9w,2110
49
49
  gwaslab/util_in_fill_data.py,sha256=gdTwYA6FvBMnrtxAeL0lEj_Z0aGIoRNPScWDlJvZWeQ,14021
50
- gwaslab/util_in_filter_value.py,sha256=6yz3omukfqhmkfGZwGtr2BPQ6FcSTj4l6o7EhPMXRz0,22100
50
+ gwaslab/util_in_filter_value.py,sha256=dY4X66N9A4MHCRHjPqLYFufMM91ggLRwUBf_nJYh8Lg,23605
51
51
  gwaslab/util_in_get_density.py,sha256=kpKXH69acMkeYVG5vs-VbJC3COhmuLBfYco-wuOxgjc,3934
52
- gwaslab/util_in_get_sig.py,sha256=atyBJZCWGUSgy-nvIR8_a_isseq1nKhzTaRVG2LbKQk,37762
52
+ gwaslab/util_in_get_sig.py,sha256=9kq1GXacknO2YnVmsTli1GlPA728ASweTZ3UKm3Wszo,38783
53
+ gwaslab/util_in_meta.py,sha256=5K9lIZcIgUy0AERqHy1GvMN2X6dp45JUUgopuDLgt4o,11284
54
+ gwaslab/util_in_snphwe.py,sha256=-KpIDx6vn_nah6H55IkV2OyjXQVXV13XyBL069WE1wM,1751
53
55
  gwaslab/viz_aux_annotate_plot.py,sha256=R-1GT89E4NEBAMNTYzNawdi9rjQV5LCnODgnYOOKsys,32184
56
+ gwaslab/viz_aux_chromatin.py,sha256=NzbFFpbwAMH-39F8z0qJaExw-JcKYcAlzyzbMkvFo5M,4002
54
57
  gwaslab/viz_aux_quickfix.py,sha256=Z6ZNEAUFuWVDTzH-qGreNGxPxJLCmqhXtBrvDOgo4g8,18308
55
58
  gwaslab/viz_aux_reposition_text.py,sha256=iRIP-Rkltlei068HekJcVubiqPrunBqvAoSQ1eHk04M,4304
56
59
  gwaslab/viz_aux_save_figure.py,sha256=nL-aoE8Kg06h7FgleGRBIZjhI-6w5gpn3E1HWMwBig8,2664
57
60
  gwaslab/viz_plot_compare_af.py,sha256=qtXW45-Sq_ugK8ZfqBYMpmf58SKi3lB3YyHnzn_akcE,5344
58
- gwaslab/viz_plot_compare_effect.py,sha256=8om3y6YQfnOk4FfkKSpKr2KqJcsMeCwQ6FRRKbDrm3U,49366
61
+ gwaslab/viz_plot_compare_effect.py,sha256=iA74jMzh-G65U6BeXyQro08tPlJWpNyvtrjFsYHLvFM,49505
59
62
  gwaslab/viz_plot_forestplot.py,sha256=xgOnefh737CgdQxu5naVyRNBX1NQXPFKzf51fbh6afs,6771
60
63
  gwaslab/viz_plot_miamiplot.py,sha256=rCFEp7VNuVqeBBG3WRkmFAtFklbF79BvIQQYiSY70VY,31238
61
64
  gwaslab/viz_plot_miamiplot2.py,sha256=SWv82D8UBbREKsk8EoKth-2w68l6FbXyVLsb_E1hh8o,15882
62
- gwaslab/viz_plot_mqqplot.py,sha256=PzRWnm11whxww7ut-bzFkj1sbPc_c0OP7yRpIgYo2iQ,61739
65
+ gwaslab/viz_plot_mqqplot.py,sha256=oVFiLe6Xv_-ryY8I36tGjU9StjM_ust72YSSfnZgaUg,61828
63
66
  gwaslab/viz_plot_qqplot.py,sha256=psQgVpP29686CEZkzQz0iRbApzqy7aE3GGiBcazVvNw,7247
64
- gwaslab/viz_plot_regionalplot.py,sha256=PBIWkNj2fj-dRLKQJNpM8wor5jya2anqix0-UYLE0Is,37901
67
+ gwaslab/viz_plot_regionalplot.py,sha256=8u-5-yfy-UaXhaxVVz3Y5k2kBAoqzczUw1hyyD450iI,37983
65
68
  gwaslab/viz_plot_rg_heatmap.py,sha256=PidUsgOiEVt6MfBPCF3_yDhOEytZ-I1q-ZD6_0pFrV4,13713
66
- gwaslab/viz_plot_stackedregional.py,sha256=EAHz5SZGengZ_pxcOg62ZRimGRz6_goQlp9MLCwmeZc,11890
69
+ gwaslab/viz_plot_stackedregional.py,sha256=yWxAJyqA3Kv9PUFTLzgbAu_hP7wi3_3bVVQlJECm8Gc,14687
67
70
  gwaslab/viz_plot_trumpetplot.py,sha256=ZHdc6WcVx0-oKoj88yglRkmB4bS9pOiEMcuwKW35Yvo,42672
68
71
  gwaslab/data/formatbook.json,sha256=N2nJs80HH98Rsu9FxaSvIQO9J5yIV97WEtAKjRqYwiY,38207
69
72
  gwaslab/data/reference.json,sha256=k8AvvgDsuLxzv-NCJHWvTUZ5q_DLAFxs1Th3jtL313k,11441
@@ -73,9 +76,9 @@ gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz,sha256=qD9RsC5S2h6l-OdpW
73
76
  gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz,sha256=Y8ZT2FIAhbhlgCJdE9qQVAiwnV_fcsPt72usBa7RSBM,10225828
74
77
  gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz,sha256=R7IkssKu0L4WwkU9SrS84xCMdrkkKL0gnTNO_OKbG0Y,219
75
78
  gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz,sha256=76CIU0pibDJ72Y6UY-TbIKE9gEPwTELAaIbCXyjm80Q,470
76
- gwaslab-3.4.45.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
77
- gwaslab-3.4.45.dist-info/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
78
- gwaslab-3.4.45.dist-info/METADATA,sha256=5FN5dbVypNPET635Eooi01_1NDFD1dNr1T9Jv0JXmLc,7757
79
- gwaslab-3.4.45.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
80
- gwaslab-3.4.45.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
81
- gwaslab-3.4.45.dist-info/RECORD,,
79
+ gwaslab-3.4.46.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
80
+ gwaslab-3.4.46.dist-info/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
81
+ gwaslab-3.4.46.dist-info/METADATA,sha256=aw5qahaxh7TAYCLNPdBO1FmHCWQk3mQcOlZohaGqorw,7765
82
+ gwaslab-3.4.46.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
83
+ gwaslab-3.4.46.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
84
+ gwaslab-3.4.46.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (70.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5