gwaslab 3.5.5__py3-none-any.whl → 3.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

@@ -1490,10 +1490,100 @@ def infer_af(chr,start,end,ref,alt,vcf_reader,alt_freq,chr_dict=None):
1490
1490
  elif record.ref==alt and (ref in record.alts):
1491
1491
  return 1 - record.info[alt_freq][0]
1492
1492
  return np.nan
1493
+ ##############################################################################################################################################################################################
1493
1494
 
1495
+ ################################################################################################################
1494
1496
 
1497
+ def _paralleleinferafwithmaf(sumstats,ref_infer,ref_alt_freq=None,n_cores=1, chr="CHR",pos="POS",ref="NEA",alt="EA",
1498
+ eaf="EAF",maf="MAF",ref_eaf="_REF_EAF",status="STATUS",chr_dict=None,force=False, verbose=True,log=Log()):
1499
+ ##start function with col checking##########################################################
1500
+ _start_line = "infer sumstats EAF from sumstats MAF using reference VCF ALT frequency"
1501
+ _end_line = "inferring sumstats EAF from sumstats MAF using reference VCF ALT frequency"
1502
+ _start_cols = [chr,pos,ref,alt,status]
1503
+ _start_function = ".infer_af()"
1504
+ _must_args ={"ref_alt_freq":ref_alt_freq}
1495
1505
 
1496
- ################################################################################################################
1506
+ is_enough_info = start_to(sumstats=sumstats,
1507
+ log=log,
1508
+ verbose=verbose,
1509
+ start_line=_start_line,
1510
+ end_line=_end_line,
1511
+ start_cols=_start_cols,
1512
+ start_function=_start_function,
1513
+ n_cores=n_cores,
1514
+ ref_vcf=ref_infer,
1515
+ **_must_args)
1516
+ if is_enough_info == False: return sumstats
1517
+ ############################################################################################
1518
+ chr_dict = auto_check_vcf_chr_dict(ref_infer, chr_dict, verbose, log)
1519
+
1520
+ if eaf not in sumstats.columns:
1521
+ sumstats[eaf]=np.nan
1522
+ if ref_eaf not in sumstats.columns:
1523
+ sumstats[ref_eaf]=np.nan
1524
+
1525
+ prenumber = sum(sumstats[eaf].isna())
1526
+
1527
+ # ref_alt_freq INFO in vcf was provided
1528
+ if ref_alt_freq is not None:
1529
+ log.write(" -Field for alternative allele frequency in VCF INFO: {}".format(ref_alt_freq), verbose=verbose)
1530
+ if not force:
1531
+ good_chrpos = sumstats[status].str.match(r'\w\w\w[0]\w\w\w', case=False, flags=0, na=False)
1532
+ log.write(" -Checking variants:", sum(good_chrpos),verbose=verbose)
1533
+
1534
+ ########################
1535
+ #extract ref af
1536
+ if sum(sumstats[eaf].isna())<10000:
1537
+ n_cores=1
1538
+ #df_split = np.array_split(sumstats.loc[good_chrpos,[chr,pos,ref,alt]], n_cores)
1539
+ df_split = _df_split(sumstats.loc[good_chrpos,[chr,pos,ref,alt]], n_cores)
1540
+ pool = Pool(n_cores)
1541
+ map_func = partial(inferaf,chr=chr,pos=pos,ref=ref,alt=alt,eaf=ref_eaf,ref_infer=ref_infer,ref_alt_freq=ref_alt_freq,chr_dict=chr_dict)
1542
+ sumstats.loc[good_chrpos,[ref_eaf]] = pd.concat(pool.map(map_func,df_split))
1543
+ pool.close()
1544
+ pool.join()
1545
+
1546
+ ###########################
1547
+ # infer sumstats EAF
1548
+ # based on sumstats MAF and reference EAF
1549
+ is_filpped = ((sumstats[ref_eaf]>=0.5)&(sumstats[maf]<=0.5)) |((sumstats[ref_eaf]<0.5)&(sumstats[maf]>0.5))
1550
+ sumstats[eaf] = sumstats[maf]
1551
+ log.write(" -Flipping MAF to obtain EAF for {} variants".format(sum(is_filpped)),verbose=verbose)
1552
+ sumstats.loc[is_filpped,eaf] = 1 - sumstats.loc[is_filpped,maf]
1553
+
1554
+ ###########################
1555
+ afternumber = sum(sumstats[eaf].isna())
1556
+ log.write(" -Inferred EAF for {} variants.".format(prenumber - afternumber),verbose=verbose)
1557
+ log.write(" -EAF is still missing for {} variants.".format(afternumber),verbose=verbose)
1558
+ sumstats = sumstats.drop(columns=[ref_eaf])
1559
+
1560
+ finished(log,verbose,_end_line)
1561
+ return sumstats
1562
+
1563
+ def inferaf(sumstats,ref_infer,ref_alt_freq=None,chr="CHR",pos="POS",ref="NEA",alt="EA",eaf="EAF",chr_dict=None):
1564
+ #vcf_reader = vcf.Reader(open(ref_infer, 'rb'))
1565
+ vcf_reader = VariantFile(ref_infer)
1566
+ def afapply(x,vcf,alt_freq,chr_dict):
1567
+ return infer_af(x.iloc[0],x.iloc[1]-1,x.iloc[1],x.iloc[2],x.iloc[3],vcf_reader,ref_alt_freq,chr_dict)
1568
+ map_func = partial(afapply,vcf=vcf_reader,alt_freq=ref_alt_freq,chr_dict=chr_dict)
1569
+ status_inferred = sumstats.apply(map_func,axis=1)
1570
+ sumstats[eaf] = status_inferred.values
1571
+ sumstats[eaf]=sumstats[eaf].astype("float")
1572
+ return sumstats
1573
+
1574
+ def infer_af(chr,start,end,ref,alt,vcf_reader,alt_freq,chr_dict=None):
1575
+ if chr_dict is not None: chr=chr_dict[chr]
1576
+ chr_seq = vcf_reader.fetch(chr,start,end)
1577
+
1578
+ for record in chr_seq:
1579
+ if record.pos==end:
1580
+ if record.ref==ref and (alt in record.alts):
1581
+ return record.info[alt_freq][0]
1582
+ elif record.ref==alt and (ref in record.alts):
1583
+ return 1 - record.info[alt_freq][0]
1584
+ return np.nan
1585
+
1586
+ ##############################################################################################################################################################################################
1497
1587
  def auto_check_vcf_chr_dict(vcf_path, vcf_chr_dict, verbose, log):
1498
1588
  if vcf_path is not None:
1499
1589
  if vcf_chr_dict is None:
@@ -1178,7 +1178,7 @@ def sanitycheckstats(sumstats,
1178
1178
  t=(-99999,99999),
1179
1179
  f=(0,float("Inf")),
1180
1180
  p=(0,1),
1181
- mlog10p=(0,9999),
1181
+ mlog10p=(0,99999),
1182
1182
  beta=(-100,100),
1183
1183
  se=(0,float("Inf")),
1184
1184
  OR=(-100,100),
@@ -37,7 +37,7 @@ from gwaslab.hm_harmonize_sumstats import auto_check_vcf_chr_dict
37
37
  #check if in outcome and exposure snp list
38
38
  #replace
39
39
 
40
- def _extract_with_ld_proxy( snplist=None,
40
+ def _extract_with_ld_proxy( snplist=None,
41
41
  common_sumstats=None,
42
42
  sumstats1=None,
43
43
  vcf_path=None,
@@ -58,6 +58,7 @@ def _extract_with_ld_proxy( snplist=None,
58
58
  is_needed=[]
59
59
  no_need =[]
60
60
 
61
+ print(common_sumstats.head())
61
62
  for i in snplist:
62
63
  if i in common_sumstats["SNPID"].values:
63
64
  no_need.append(i)
@@ -72,7 +73,7 @@ def _extract_with_ld_proxy( snplist=None,
72
73
  if len(in_sumstats)==0:
73
74
  log.write(" -No available variants for LD proxy checking...Skipping... ", verbose=verbose)
74
75
  else:
75
- log.write(" -{}} available variants for LD proxy checking... ".format(len(in_sumstats)), verbose=verbose)
76
+ log.write(" -{} available variants for LD proxy checking... ".format(len(in_sumstats)), verbose=verbose)
76
77
 
77
78
  for index,row in in_sumstats.iterrows():
78
79
  # determine SNP and select region
@@ -93,6 +94,16 @@ def _extract_with_ld_proxy( snplist=None,
93
94
  if len(flanking_sumstats)==0:
94
95
  log.write(" -No availble variants in the region...Skipping!", verbose=verbose)
95
96
  continue
97
+
98
+ _get_rsq_single(in_sumstats.loc[index,["POS","NEA_1","EA_1"]],
99
+ row_pos=row["POS"],
100
+ vcf_path=vcf_path,
101
+ region=region,
102
+ log=log,
103
+ verbose=verbose,
104
+ vcf_chr_dict=vcf_chr_dict,
105
+ tabix=tabix)
106
+
96
107
 
97
108
  flanking_sumstats = _get_rsq(row =in_sumstats.loc[index,["POS","NEA_1","EA_1"]],
98
109
  sumstats = flanking_sumstats,
@@ -126,6 +137,81 @@ def _extract_with_ld_proxy( snplist=None,
126
137
  return extracted_sumstats
127
138
 
128
139
 
140
+ def _extract_ld_proxy( snplist=None,
141
+ common_sumstats=None,
142
+ vcf_path=None,
143
+ vcf_chr_dict=None,
144
+ tabix=None,
145
+ log=Log(),
146
+ verbose=True,
147
+ windowsizekb=100,
148
+ ld_threshold=0.8
149
+ ):
150
+ ### Load vcf#######################################################################################
151
+ log.write("Start to load reference genotype...", verbose=verbose)
152
+ log.write(" -reference vcf path : "+ vcf_path, verbose=verbose)
153
+ if tabix is None:
154
+ tabix = which("tabix")
155
+ vcf_chr_dict = auto_check_vcf_chr_dict(vcf_path=vcf_path, vcf_chr_dict=vcf_chr_dict, verbose=verbose, log=log)
156
+
157
+ ld_proxies = pd.DataFrame()
158
+ in_sumstats = common_sumstats.loc[common_sumstats["SNPID"].isin(snplist),:]
159
+
160
+ if len(in_sumstats)==0:
161
+ log.write(" -No available variants for LD proxy checking...Skipping... ", verbose=verbose)
162
+ else:
163
+ log.write(" -{} available variants for LD proxy checking... ".format(len(in_sumstats)), verbose=verbose)
164
+
165
+ for index,row in in_sumstats.iterrows():
166
+ # determine SNP and select region
167
+ snpid = row["SNPID"]
168
+ chrom= int(row["CHR"])
169
+ start= int(row["POS"]-windowsizekb*1000)
170
+ end= int(row["POS"]+windowsizekb*1000)
171
+
172
+ region = (chrom, start, end)
173
+
174
+ ### #######################################################################################
175
+ #is_flanking = common_sumstats["CHR"] == chrom & common_sumstats["CHR"]>start & common_sumstats["CHR"]<end
176
+ #flanking_sumstats = common_sumstats.loc[is_flanking,:]
177
+ flanking_sumstats = common_sumstats.query('CHR == @chrom and @start < POS < @end',engine='python').copy()
178
+
179
+ log.write(" -Extract {} variants in flanking region of {} for checking: {}:{}-{}".format(len(flanking_sumstats), snpid, chrom, start, end), verbose=verbose)
180
+
181
+ if len(flanking_sumstats)==0:
182
+ log.write(" -No availble variants in the region...Skipping!", verbose=verbose)
183
+ continue
184
+
185
+ flanking_sumstats = _get_rsq(row =in_sumstats.loc[index,["POS","NEA","EA"]],
186
+ sumstats = flanking_sumstats,
187
+ row_pos=row["POS"],
188
+ vcf_path=vcf_path,
189
+ region=region,
190
+ log=log,
191
+ verbose=verbose,
192
+ vcf_chr_dict=vcf_chr_dict,
193
+ tabix=tabix)
194
+ if flanking_sumstats is None:
195
+ log.write(" -{} is not found in the vcf...Skipping!".format(snpid))
196
+ continue
197
+ flanking_sumstats = flanking_sumstats.loc[flanking_sumstats["RSQ"]>ld_threshold,:]
198
+
199
+ log.write(" -Variants in LD with {} (RSQ > {}): {}".format(snpid, ld_threshold,len(flanking_sumstats)), verbose=verbose)
200
+
201
+ if len(flanking_sumstats)>0:
202
+ flanking_sumstats["LD_REF_VARIANT"]= snpid
203
+ for i,row_with_rsq in flanking_sumstats.iterrows():
204
+ if row_with_rsq["SNPID"] in common_sumstats["SNPID"].values:
205
+ log.write(" -Top Proxy for {} is found: {} (LD RSQ= {})".format(snpid, row_with_rsq["SNPID"], row_with_rsq["RSQ"]))
206
+ break
207
+ #row_with_rsq = pd.DataFrame(row_with_rsq)
208
+ ld_proxies = pd.concat([ld_proxies, flanking_sumstats], ignore_index=True)
209
+
210
+
211
+ log.write("Finished loading reference genotype successfully!", verbose=verbose)
212
+ return ld_proxies.sort_values(by="RSQ",ascending=False)
213
+
214
+
129
215
  def _get_rsq( row,
130
216
  sumstats,
131
217
  row_pos,
@@ -205,4 +291,77 @@ def _get_rsq( row,
205
291
  return sumstats
206
292
 
207
293
  def _check_if_in_sumstats2(row, sumstast):
208
- pass
294
+ pass
295
+
296
+ def _get_rsq_single( row,
297
+ row_pos,
298
+ vcf_path,
299
+ region,
300
+ log,
301
+ verbose,
302
+ vcf_chr_dict,
303
+ tabix):
304
+ #load genotype data of the targeted region
305
+ ref_genotype = read_vcf(vcf_path,region=vcf_chr_dict[region[0]]+":"+str(region[1])+"-"+str(region[2]),tabix=tabix)
306
+
307
+ if ref_genotype is None:
308
+ log.warning("No data was retrieved. Skipping ...", verbose=verbose)
309
+ ref_genotype=dict()
310
+ ref_genotype["variants/POS"]=np.array([],dtype="int64")
311
+ return None
312
+
313
+ log.write(" -Retrieving index...", verbose=verbose)
314
+ log.write(" -Ref variants in the region: {}".format(len(ref_genotype["variants/POS"])), verbose=verbose)
315
+ # match sumstats pos and ref pos:
316
+ # get ref index for its first appearance of sumstats pos
317
+ #######################################################################################
318
+ def match_varaint(x):
319
+ # x: "POS,NEA,EA"
320
+ if np.any(ref_genotype["variants/POS"] == x.iloc[0]):
321
+ if len(np.where(ref_genotype["variants/POS"] == x.iloc[0] )[0])>1:
322
+ # multiple position matches
323
+ for j in np.where(ref_genotype["variants/POS"] == x.iloc[0])[0]:
324
+ # for each possible match, compare ref and alt
325
+ if x.iloc[1] == ref_genotype["variants/REF"][j]:
326
+ if x.iloc[2] in ref_genotype["variants/ALT"][j]:
327
+ return j
328
+ elif x.iloc[1] in ref_genotype["variants/ALT"][j]:
329
+ if x.iloc[2] == ref_genotype["variants/REF"][j]:
330
+ return j
331
+ else:
332
+ return None
333
+ else:
334
+ # single match
335
+ return np.where(ref_genotype["variants/POS"] == x.iloc[0] )[0][0]
336
+ else:
337
+ # no position match
338
+ return None
339
+
340
+ #############################################################################################
341
+ lead_pos = row_pos
342
+
343
+ # if lead pos is available:
344
+ if lead_pos in ref_genotype["variants/POS"]:
345
+
346
+ # get ref index for lead snp
347
+ lead_snp_ref_index = match_varaint(row)
348
+ #lead_snp_ref_index = np.where(ref_genotype["variants/POS"] == lead_pos)[0][0]
349
+
350
+ # non-na other snp index
351
+ other_snps_ref_index = list(range(len(ref_genotype["calldata/GT"])))
352
+ other_snps_ref_index.remove(lead_snp_ref_index)
353
+
354
+ # get genotype
355
+ lead_snp_genotype = GenotypeArray([ref_genotype["calldata/GT"][lead_snp_ref_index]]).to_n_alt()
356
+ other_snp_genotype = GenotypeArray(ref_genotype["calldata/GT"][other_snps_ref_index]).to_n_alt()
357
+
358
+ log.write(" -Calculating Rsq...", verbose=verbose)
359
+
360
+ if len(other_snp_genotype)>1:
361
+ valid_r2= np.power(rogers_huff_r_between(lead_snp_genotype,other_snp_genotype)[0],2)
362
+ else:
363
+ valid_r2= np.power(rogers_huff_r_between(lead_snp_genotype,other_snp_genotype),2)
364
+
365
+ ld_proxy = pd.DataFrame( {"SNPID":ref_genotype["variants/ID"][other_snps_ref_index],"RSQ":valid_r2 })
366
+
367
+ return ld_proxy.sort_values(by="RSQ",ascending=False)
@@ -40,7 +40,7 @@ def filldata(
40
40
  for i in skip_cols:
41
41
  to_fill.remove(i)
42
42
  log.write(" -Skipping columns: ",skip_cols, verbose=verbose)
43
- if len(set(to_fill) & set(["OR","OR_95L","OR_95U","BETA","SE","P","Z","CHISQ","MLOG10P","MAF"]))==0:
43
+ if len(set(to_fill) & set(["OR","OR_95L","OR_95U","BETA","SE","P","Z","CHISQ","MLOG10P","MAF","SIG"]))==0:
44
44
  log.write(" -No available columns to fill. Skipping.", verbose=verbose)
45
45
  log.write("Finished filling data using existing columns.", verbose=verbose)
46
46
  return sumstats
@@ -219,6 +219,20 @@ def fill_maf(sumstats,log,verbose=True,filled_count=0):
219
219
  return 0,filled_count
220
220
  return 1,filled_count
221
221
 
222
+ def fill_sig(sumstats,log,sig_level=5e-8, verbose=True,filled_count=0):
223
+ if "P" in sumstats.columns or "MLOG10P" in sumstats.columns:
224
+ log.write(" - Determining significant using P and MLOG10P with threshold:{}".format(sig_level), verbose=verbose)
225
+ if "P" in sumstats.columns:
226
+ is_sig = sumstats["P"]<sig_level
227
+ elif "MLOG10P" in sumstats.columns:
228
+ is_sig = sumstats["MLOG10P"]>np.log10(sig_level)
229
+ sumstats["SIGNIFICANT"] = False
230
+ sumstats.loc[is_sig, "SIGNIFICANT"] = True
231
+ filled_count +=1
232
+ else:
233
+ return 0,filled_count
234
+ return 1,filled_count
235
+
222
236
  ####################################################################################################################
223
237
  def fill_extreme_mlog10(sumstats, z):
224
238
  log_pvalue = np.log(2) + ss.norm.logsf(np.abs(sumstats[z])) #two-sided
@@ -289,7 +303,10 @@ def fill_iteratively(sumstats,raw_to_fill,log,only_sig,df,extreme,verbose,sig_le
289
303
  else:
290
304
  status,filled_count = fill_mlog10p(sumstats,log,verbose=verbose)
291
305
  if status == 1 : to_fill.remove("MLOG10P")
292
-
306
+
307
+ if "SIG" in to_fill:
308
+ status,filled_count = fill_sig(sumstats,sig_level=sig_level ,log=log,verbose=verbose,filled_count=filled_count)
309
+ if status == 1 : to_fill.remove("SIG")
293
310
  if filled_count == 0:
294
311
  break
295
312
 
@@ -527,4 +527,55 @@ def _filter_region(sumstats, region, chrom="CHR",pos="POS",log=Log(),verbose=Tru
527
527
 
528
528
  log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)),verbose=verbose)
529
529
  sumstats = sumstats.loc[in_region_snp,:]
530
- return sumstats.copy()
530
+ return sumstats.copy()
531
+
532
+ def _search_variants( sumstats, snplist=None,
533
+ snpid="SNPID" ,rsid="rsID",
534
+ chrom="CHR",pos="POS",ea="EA",nea="NEA",
535
+ log=Log(),verbose=True):
536
+ log.write("Start to search for variants...", verbose=verbose)
537
+ # create a boolean col with FALSE
538
+ if snpid in sumstats.columns:
539
+ is_extract = sumstats[snpid]!=sumstats[snpid]
540
+ else:
541
+ is_extract = sumstats[rsid]!=sumstats[rsid]
542
+
543
+ # search each variant
544
+ for variant in snplist:
545
+
546
+ if pd.api.types.is_list_like(variant):
547
+ # (1:1234)
548
+ single_chrom=variant[0]
549
+ single_pos=variant[1]
550
+ is_extract = is_extract | ((sumstats[pos] == single_pos ) &(sumstats[chrom] == single_chrom))
551
+
552
+ elif pd.api.types.is_string_dtype(type(variant)):
553
+ # rs123
554
+ if "rsID" in sumstats.columns:
555
+ is_extract = is_extract | (sumstats["rsID"] == variant)
556
+
557
+ # 1:123:A:D
558
+ if "SNPID" in sumstats.columns:
559
+ is_extract = is_extract | (sumstats["SNPID"] == variant)
560
+
561
+ # 1:123:A:D -> (1:1234)
562
+ a= re.match(r'^(chr|Chr|CHR)?(\d+)[:_-](\d+)([:_-]([ATCG]+)[:_-]([ATCG]+))?$', variant, flags=0)
563
+
564
+ if a is not None:
565
+ if a[4] is None:
566
+ single_chrom=int(a[2])
567
+ single_pos=int(a[3])
568
+ is_extract = is_extract | ((sumstats[pos] == single_pos ) &(sumstats[chrom] == single_chrom))
569
+ else:
570
+ single_chrom = int(a[2])
571
+ single_pos = int(a[3])
572
+ single_ea = a[5]
573
+ single_nea = a[6]
574
+ a_match = ((sumstats[nea] == single_nea) & (sumstats[ea] == single_ea)) | ((sumstats[nea] == single_ea) & (sumstats[ea] == single_nea))
575
+ is_extract = is_extract | ((sumstats[pos] == single_pos ) &(sumstats[chrom] == single_chrom) & a_match)
576
+
577
+ to_search = sumstats.loc[is_extract,:].copy()
578
+ log.write(" -Found {} variants...".format(len(to_search)),verbose=verbose)
579
+
580
+ log.write("Finished searching variants.", verbose=verbose)
581
+ return to_search
@@ -0,0 +1,51 @@
1
+ import pandas as pd
2
+ from gwaslab.g_Log import Log
3
+ import re
4
+
5
+ def _extract_variant(variant_set, sumstats_dic, log=Log(), verbose=True):
6
+
7
+ combined = pd.DataFrame()
8
+ log.write("Start to initialize gl.SumstatsSet...", verbose=verbose)
9
+ for key, sumstats_gls in sumstats_dic.items():
10
+ log.write(" -{} : {}".format(key, sumstats_gls), verbose=verbose)
11
+
12
+ for key, sumstats_gls in sumstats_dic.items():
13
+
14
+ sumstats_single = sumstats_gls.data
15
+
16
+ # create a boolean col with FALSE
17
+ is_extract = sumstats_single["SNPID"]!=sumstats_single["SNPID"]
18
+
19
+ for variant in variant_set:
20
+
21
+ if pd.api.types.is_list_like(variant):
22
+
23
+ chrom=variant[0]
24
+ pos=variant[1]
25
+
26
+ is_extract = is_extract | ((sumstats_single["POS"] == pos ) &(sumstats_single["CHR"] == chrom))
27
+ elif pd.api.types.is_string_dtype(type(variant)):
28
+
29
+ is_extract = is_extract | (sumstats_single["SNPID"] == variant)
30
+
31
+ a= re.search(r'^(chr|Chr|CHR)?(\d+)[:_-](\d+)[:_-][ATCG]+[:_-][ATCG]+$', variant, flags=0)
32
+ if a is not None:
33
+ chrom=int(a[2])
34
+ pos=int(a[3])
35
+ is_extract = is_extract | ((sumstats_single["POS"] == pos ) &(sumstats_single["CHR"] == chrom))
36
+
37
+ to_extract = sumstats_single.loc[is_extract,:].copy()
38
+ log.write(" -Extracted {} variants from {}".format(len(to_extract), key),verbose=verbose)
39
+ to_extract["STUDY"] = key
40
+
41
+ to_extract_cols=["STUDY"]
42
+
43
+ default_cols=["SNPID","EA","NEA","CHR","POS","BETA","SE","P","MLOG10P","EAF","MAF","STATUS"]
44
+
45
+ for i in default_cols:
46
+ if i in sumstats_single.columns:
47
+ to_extract_cols.append(i)
48
+
49
+ combined = pd.concat([combined, to_extract[to_extract_cols]], ignore_index=True)
50
+ log.write("Finished initializing gl.SumstatsSet.", verbose=verbose)
51
+ return combined
@@ -52,7 +52,8 @@ def get_default_path(keyword,fmt="png"):
52
52
  "esc":"effect_size_comparision",
53
53
  "afc":"allele_frequency_comparision",
54
54
  "gwheatmap":"genome_wide_heatmap",
55
- "scatter":"scatter"
55
+ "scatter":"scatter",
56
+ "forest":"forest"
56
57
  }
57
58
  prefix = path_dictionary[keyword]
58
59
  count = 1