gwaslab 3.4.47__py3-none-any.whl → 3.4.49__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

gwaslab/util_ex_ldsc.py CHANGED
@@ -304,9 +304,16 @@ def _estimate_h2_by_ldsc(insumstats, log, verbose=True, munge=False, munge_args=
304
304
  log.write(" -LDSC log:", verbose=verbose)
305
305
  summary = estimate_h2(sumstats, default_args, log)
306
306
 
307
+ results_table = None
308
+ if type(summary) is tuple:
309
+ results_table = summary[1]
310
+ summary = summary[0]
311
+ log.write(" -Coefficient results have been stored in .ldsc_h2_results", verbose=verbose)
312
+
313
+
307
314
  log.write(" -Results have been stored in .ldsc_h2", verbose=verbose)
308
315
  finished(log=log,verbose=verbose,end_line=_end_line)
309
- return parse_ldsc_summary(summary)
316
+ return parse_ldsc_summary(summary), results_table
310
317
 
311
318
 
312
319
  ####################################################################################################################
@@ -11,7 +11,7 @@ from gwaslab.g_version import _checking_plink_version
11
11
  def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
12
12
  p="P",mlog10p="MLOG10P", overwrite=False, study=None, bfile=None,
13
13
  n_cores=1, memory=None, chrom=None, clump_p1=5e-8, clump_p2=5e-8, clump_r2=0.01, clump_kb=250,
14
- log=Log(),verbose=True):
14
+ log=Log(),verbose=True,plink="plink",plink2="plink2"):
15
15
  ##start function with col checking##########################################################
16
16
  _start_line = "perfrom clumping"
17
17
  _end_line = "clumping"
@@ -111,7 +111,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
111
111
  bfile_to_use = bfile
112
112
 
113
113
  log.write(" -Performing clumping for CHR {}...".format(i),verbose=verbose)
114
- log = _checking_plink_version(v=2, log=log)
114
+ log = _checking_plink_version(plink2=plink2, log=log)
115
115
  if memory is not None:
116
116
  memory_flag = "--memory {}".format(memory)
117
117
 
@@ -123,7 +123,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
123
123
  if scaled == True:
124
124
  # clumping using LOG10P
125
125
  script = """
126
- plink2 \
126
+ {} \
127
127
  {}\
128
128
  --chr {} \
129
129
  --clump {} \
@@ -136,11 +136,11 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
136
136
  --clump-kb {} \
137
137
  --threads {} {}\
138
138
  --out {}
139
- """.format(file_flag, chrom, clump, mlog10p,clump_log10_p1, clump_log10_p2, clump_r2, clump_kb, n_cores, memory_flag if memory is not None else "", out_single_chr)
139
+ """.format(plink2, file_flag, chrom, clump, mlog10p,clump_log10_p1, clump_log10_p2, clump_r2, clump_kb, n_cores, memory_flag if memory is not None else "", out_single_chr)
140
140
  else:
141
141
  # clumping using P
142
142
  script = """
143
- plink2 \
143
+ {} \
144
144
  {}\
145
145
  --chr {} \
146
146
  --clump {} \
@@ -152,7 +152,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
152
152
  --clump-kb {} \
153
153
  --threads {} {}\
154
154
  --out {}
155
- """.format(file_flag, chrom, clump, p, clump_p1, clump_p2, clump_r2, clump_kb, n_cores,memory_flag if memory is not None else "", out_single_chr)
155
+ """.format(plink2,file_flag, chrom, clump, p, clump_p1, clump_p2, clump_r2, clump_kb, n_cores,memory_flag if memory is not None else "", out_single_chr)
156
156
 
157
157
  try:
158
158
  output = subprocess.check_output(script, stderr=subprocess.STDOUT, shell=True,text=True)
@@ -184,7 +184,8 @@ def fill_mlog10p(sumstats,log,verbose=True,filled_count=0):
184
184
  else:
185
185
  return 0,filled_count
186
186
  return 1,filled_count
187
- def fill_extreme_mlog10p(sumstats,log,verbose=True,filled_count=0):
187
+
188
+ def fill_extreme_mlog10p(sumstats,df,log,verbose=True,filled_count=0):
188
189
  # ref: https://stackoverflow.com/questions/46416027/how-to-compute-p-values-from-z-scores-in-r-when-the-z-score-is-large-pvalue-muc/46416222#46416222
189
190
  if "Z" in sumstats.columns:
190
191
  # P -> MLOG10P
@@ -198,6 +199,10 @@ def fill_extreme_mlog10p(sumstats,log,verbose=True,filled_count=0):
198
199
  log.write(" - Filling MLOG10P using Z column...", verbose=verbose)
199
200
  sumstats = fill_extreme_mlog10(sumstats, "Z")
200
201
  filled_count +=1
202
+ elif "CHISQ" in sumstats.columns and "DOF" in sumstats.columns:
203
+ log.write(" - Filling MLOG10P using CHISQ and DOF column...", verbose=verbose)
204
+ sumstats = fill_extreme_mlog10_chisq(sumstats, "CHISQ", df)
205
+ filled_count +=1
201
206
  else:
202
207
  return 0,filled_count
203
208
  return 1,filled_count
@@ -223,6 +228,19 @@ def fill_extreme_mlog10(sumstats, z):
223
228
  sumstats["P_EXPONENT"]= exponent
224
229
  return sumstats
225
230
 
231
+ def fill_extreme_mlog10_chisq(sumstats, chisq, df):
232
+ #https://stackoverflow.com/a/46416222/199475
233
+ log_pvalue = ss.chi2.logsf(sumstats[chisq], sumstats[df])
234
+
235
+ log10_pvalue = log_pvalue/np.log(10)
236
+
237
+ mantissa = 10**(log10_pvalue %1)
238
+ exponent = log10_pvalue // 1
239
+ sumstats["MLOG10P"] = -log10_pvalue
240
+ sumstats["P_MANTISSA"]= mantissa
241
+ sumstats["P_EXPONENT"]= exponent
242
+ return sumstats
243
+
226
244
  ####################################################################################################################
227
245
  def fill_iteratively(sumstats,raw_to_fill,log,only_sig,df,extreme,verbose,sig_level):
228
246
  to_fill = raw_to_fill.copy()
@@ -260,7 +278,7 @@ def fill_iteratively(sumstats,raw_to_fill,log,only_sig,df,extreme,verbose,sig_le
260
278
  # p to -log10(P) ###############################################################################################
261
279
  if "MLOG10P" in to_fill:
262
280
  if extreme==True:
263
- status,filled_count = fill_extreme_mlog10p(sumstats,log,verbose=verbose,filled_count=filled_count)
281
+ status,filled_count = fill_extreme_mlog10p(sumstats,df, log,verbose=verbose,filled_count=filled_count)
264
282
  filled_count +=1
265
283
  elif "P" not in sumstats.columns:
266
284
  fill_p(sumstats,log,verbose=verbose)
@@ -38,6 +38,7 @@ def annotate_single(
38
38
  region,
39
39
  region_anno_bbox_args,
40
40
  skip,
41
+ anno_height=1,
41
42
  amode="int",
42
43
  snpid="SNPID",
43
44
  chrom="CHR",
@@ -131,7 +132,7 @@ def annotate_single(
131
132
 
132
133
  #xy=(row["i"],row["scaled_P"]+0.2)
133
134
  xy=(row["i"],row["scaled_P"]+0.01*maxy)
134
- xytext=(last_pos,1.15*maxy*arm_scale)
135
+ xytext=(last_pos,1.15*maxy*arm_scale*anno_height)
135
136
 
136
137
  if anno_fixed_arm_length is not None:
137
138
  armB_length_in_point = anno_fixed_arm_length
@@ -286,8 +286,9 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
286
286
  log.write(" -Converting data above cut line...",verbose=verbose)
287
287
  if ylabels is not None:
288
288
  ylabels = pd.Series(ylabels)
289
- maxy = series.max()
290
289
  series = series.copy()
290
+
291
+ maxy = series.max()
291
292
  if "b" not in mode:
292
293
  log.write(" -Maximum -log10(P) value is "+str(maxy) +" .", verbose=verbose)
293
294
  elif "b" in mode:
@@ -77,8 +77,10 @@ def compare_effect(path1,
77
77
  scaled2 = True
78
78
  if is_q_mc=="fdr" or is_q_mc=="bon":
79
79
  is_q = True
80
- else:
81
- raise ValueError("Please select either fdr or bon for is_q_mc.")
80
+
81
+ if is_q == True:
82
+ if is_q_mc not in [False,"fdr","bon","non"]:
83
+ raise ValueError("Please select either fdr or bon or non for is_q_mc.")
82
84
  if save_args is None:
83
85
  save_args = {"dpi":300,"facecolor":"white"}
84
86
  if reg_box is None:
@@ -247,7 +247,10 @@ def plot_miami2(
247
247
  plt.subplots_adjust(hspace=region_hspace)
248
248
  else:
249
249
  fig, ax1, ax5 = figax
250
-
250
+
251
+ #if same_ylim==True:
252
+ #maxy = merged_sumstats[["scaled_P_1","scaled_P_2"]].max().max()
253
+
251
254
  log.write("Start to create Manhattan plot for sumstats1...", verbose=verbose)
252
255
  fig,log = mqqplot(merged_sumstats,
253
256
  chrom="CHR",
@@ -284,16 +287,14 @@ def plot_miami2(
284
287
  _if_quick_qc=False,
285
288
  **mqq_args2)
286
289
  log.write("Finished creating Manhattan plot for sumstats2".format(_get_version()), verbose=verbose)
290
+
287
291
 
288
- if same_ylim==True:
289
- ylim1_converted = ax1.get_ylim()
290
- ylim2_converted = ax5.get_ylim()
291
- if ylim1_converted > ylim2_converted:
292
- ax5.set_ylim(ylim1_converted)
293
- else:
294
- ax1.set_ylim(ylim2_converted)
295
292
  #####################################################################################################################
296
-
293
+ ax1l, ax1r = ax5.get_xlim()
294
+ ax5l, ax5r = ax1.get_xlim()
295
+ ax1.set_xlim([min(ax1l,ax5l), max(ax1r,ax5r)])
296
+ ax5.set_xlim([min(ax1l,ax5l), max(ax1r,ax5r)])
297
+ #####################################################################################################################
297
298
  ax5.set_xlabel("")
298
299
  #ax5.set_xticks(chrom_df)
299
300
  ax5.set_xticklabels([])
@@ -141,6 +141,7 @@ def mqqplot(insumstats,
141
141
  anno_max_iter=100,
142
142
  arm_offset=50,
143
143
  arm_scale=1,
144
+ anno_height=1,
144
145
  arm_scale_d=None,
145
146
  cut=0,
146
147
  skip=0,
@@ -180,6 +181,7 @@ def mqqplot(insumstats,
180
181
  xpad=None,
181
182
  xpadl=None,
182
183
  xpadr=None,
184
+ xtight=False,
183
185
  chrpad=0.03,
184
186
  drop_chr_start=False,
185
187
  title =None,
@@ -552,7 +554,8 @@ def mqqplot(insumstats,
552
554
  cut_log = cut_log,
553
555
  verbose =verbose,
554
556
  lines_to_plot=lines_to_plot,
555
- log = log)
557
+ log = log
558
+ )
556
559
  except:
557
560
  log.warning("No valid data! Please check the input.")
558
561
  return None
@@ -596,19 +599,23 @@ def mqqplot(insumstats,
596
599
  sumstats.loc[sumstats["scaled_P"]>-np.log10(sig_level_plot),"s"]=4
597
600
  sumstats["chr_hue"]=sumstats[chrom].astype("string")
598
601
 
599
- if vcf_path is not None:
602
+ if "r" in mode:
603
+ if vcf_path is None:
604
+ sumstats["LD"]=100
605
+ sumstats["SHAPE"]=1
600
606
  sumstats["chr_hue"]=sumstats["LD"]
607
+
601
608
  ## default seetings
602
609
 
603
610
  palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
604
-
605
611
 
606
612
  legend = None
607
613
  style=None
608
614
  linewidth=0
609
615
  edgecolor="black"
610
616
  # if regional plot assign colors
611
- if vcf_path is not None:
617
+ if "r" in mode:
618
+ #if vcf_path is not None:
612
619
  legend=None
613
620
  linewidth=1
614
621
  if len(region_ref) == 1:
@@ -631,10 +638,9 @@ def mqqplot(insumstats,
631
638
  palette[(i+1)*100 + j ] = hex_color
632
639
 
633
640
  edgecolor="none"
634
- scatter_args["markers"]= region_marker_shapes[:len(region_ref)]
641
+ scatter_args["markers"]= {(i+1):m for i,m in enumerate(region_marker_shapes[:len(region_ref)])}
635
642
  style="SHAPE"
636
-
637
-
643
+
638
644
 
639
645
  ## if highlight
640
646
  highlight_i = pd.DataFrame()
@@ -977,6 +983,7 @@ def mqqplot(insumstats,
977
983
  region=region,
978
984
  region_anno_bbox_args=region_anno_bbox_args,
979
985
  skip=skip,
986
+ anno_height=anno_height,
980
987
  snpid=snpid,
981
988
  chrom=chrom,
982
989
  pos=pos,
@@ -1040,7 +1047,7 @@ def mqqplot(insumstats,
1040
1047
  if "qq" in mode:
1041
1048
  ax2.set_ylim(ylim)
1042
1049
 
1043
- ax1 = _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats)
1050
+ ax1 = _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats, pos, chrpad, xtight, log = log, verbose=verbose)
1044
1051
 
1045
1052
  # Titles
1046
1053
  if title and anno and len(to_annotate)>0:
@@ -1065,20 +1072,34 @@ def mqqplot(insumstats,
1065
1072
 
1066
1073
 
1067
1074
 
1068
- def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats):
1075
+ def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats, pos, chrpad, xtight, log, verbose):
1069
1076
 
1070
- if ax1 is not None:
1071
- xmin, xmax = ax1.get_xlim()
1072
-
1073
- if xpad is not None:
1074
- pad = xpad* sumstats["i"].max()
1075
- ax1.set_xlim([xmin - pad, xmin + pad])
1076
- if xpadl is not None:
1077
- pad = xpadl* sumstats["i"].max()
1078
- ax1.set_xlim([xmin - pad,xmax])
1079
- if xpadr is not None:
1080
- pad = xpadr* sumstats["i"].max()
1081
- ax1.set_xlim([xmin, xmax + pad])
1077
+ if xtight==True:
1078
+ log.write(" -Adjusting X padding on both side : tight mode", verbose=verbose)
1079
+ xmax = sumstats["i"].max()
1080
+ xmin= sumstats["i"].min()
1081
+ ax1.set_xlim([xmin, xmax])
1082
+
1083
+ else:
1084
+ chrpad_to_remove = sumstats[pos].max()*chrpad
1085
+ if ax1 is not None:
1086
+ xmin, xmax = ax1.get_xlim()
1087
+ length = xmax - xmin
1088
+
1089
+ if xpad is not None:
1090
+ log.write(" -Adjusting X padding on both side: {}".format(xpad), verbose=verbose)
1091
+ pad = xpad* length #sumstats["i"].max()
1092
+ ax1.set_xlim([xmin - pad + chrpad_to_remove, xmax + pad - chrpad_to_remove])
1093
+ if xpad is None and xpadl is not None:
1094
+ log.write(" -Adjusting X padding on left side: {}".format(xpadl), verbose=verbose)
1095
+ xmin, xmax = ax1.get_xlim()
1096
+ pad = xpadl*length # sumstats["i"].max()
1097
+ ax1.set_xlim([xmin - pad + chrpad_to_remove ,xmax])
1098
+ if xpad is None and xpadr is not None:
1099
+ log.write(" -Adjusting X padding on right side: {}".format(xpadr), verbose=verbose)
1100
+ xmin, xmax = ax1.get_xlim()
1101
+ pad = xpadr*length # sumstats["i"].max()
1102
+ ax1.set_xlim([xmin, xmax + pad - chrpad_to_remove])
1082
1103
 
1083
1104
  return ax1
1084
1105
 
@@ -5,6 +5,7 @@ import matplotlib.patches as patches
5
5
  import seaborn as sns
6
6
  import numpy as np
7
7
  import copy
8
+ import re
8
9
  import scipy as sp
9
10
  from pyensembl import EnsemblRelease
10
11
  from allel import GenotypeArray
@@ -96,19 +97,20 @@ def _plot_regional(
96
97
  marker_size= marker_size,
97
98
  region_marker_shapes=region_marker_shapes,
98
99
  log=log,verbose=verbose)
99
- if lead_id_single is not None:
100
- lead_ids.append(lead_id_single)
100
+ #if lead_id_single is not None:
101
+ lead_ids.append(lead_id_single)
101
102
 
102
103
  # update region_ref to variant rsID or variantID / skip NAs
103
104
  new_region_ref = []
104
105
  for i in range(len(lead_ids)):
105
106
  if lead_ids[i] is None:
107
+ new_region_ref.append(region_ref[i])
106
108
  continue
107
109
  if region_ref[i] is None:
108
- if "rsID" in sumstats.columns:
109
- new_name = sumstats.loc[lead_ids[i],"rsID"]
110
- elif "SNPID" in sumstats.columns:
110
+ if "SNPID" in sumstats.columns:
111
111
  new_name = sumstats.loc[lead_ids[i],"SNPID"]
112
+ elif "rsID" in sumstats.columns:
113
+ new_name = sumstats.loc[lead_ids[i],"rsID"]
112
114
  else:
113
115
  new_name = "chr{}:{}".format(sumstats.loc[lead_ids[i],"CHR"] , sumstats.loc[lead_ids[i],"POS"])
114
116
  new_region_ref.append(new_name)
@@ -162,7 +164,6 @@ def _plot_regional(
162
164
  lead_snp_ys = []
163
165
  lead_snp_is = []
164
166
  lead_snp_is_colors = []
165
-
166
167
  for i,lead_id_single in enumerate(lead_ids):
167
168
  if lead_id_single is not None:
168
169
  lead_snp_ys.append(sumstats.loc[lead_id_single,"scaled_P"] )
@@ -258,11 +259,35 @@ def _get_lead_id(sumstats=None, region_ref=None, log=None, verbose=True):
258
259
  if type(lead_id) is list:
259
260
  if len(lead_id)>0:
260
261
  lead_id = int(lead_id[0])
261
-
262
+
262
263
  if region_ref_to_check is not None:
263
264
  if type(lead_id) is list:
264
265
  if len(lead_id)==0 :
265
- log.warning("{} not found.. Skipping..".format(region_ref_to_check))
266
+ #try:
267
+ matched_snpid = re.match("(chr)?[0-9]+:[0-9]+:[ATCG]+:[ATCG]+", region_ref_to_check, re.IGNORECASE)
268
+ if matched_snpid is None:
269
+ pass
270
+ else:
271
+ lead_snpid = matched_snpid.group(0).split(":")
272
+ if len(lead_snpid)==4:
273
+ lead_chr= int(lead_snpid[0])
274
+ lead_pos= int(lead_snpid[1])
275
+ lead_ea= lead_snpid[2]
276
+ lead_nea= lead_snpid[3]
277
+ chrpos_match = (sumstats["CHR"] == lead_chr) & (sumstats["POS"] == lead_pos)
278
+ eanea_match = ((sumstats["EA"] == lead_ea) & (sumstats["NEA"] == lead_nea)) | ((sumstats["EA"] == lead_nea) & (sumstats["NEA"] == lead_ea))
279
+ if "rsID" in sumstats.columns:
280
+ lead_id = sumstats.index[chrpos_match&eanea_match].to_list()
281
+ if "SNPID" in sumstats.columns:
282
+ lead_id = sumstats.index[chrpos_match&eanea_match].to_list()
283
+ if type(lead_id) is list:
284
+ if len(lead_id)>0:
285
+ lead_id = int(lead_id[0])
286
+ log.warning("Trying matching variant {} using CHR:POS:EA:NEA to {}... ".format(region_ref_to_check,lead_id))
287
+
288
+ if type(lead_id) is list:
289
+ if len(lead_id)==0 :
290
+ log.warning("Extracting variant: {} not found in sumstats.. Skipping..".format(region_ref_to_check))
266
291
  #lead_id = sumstats["scaled_P"].idxmax()
267
292
  lead_id = None
268
293
  return lead_id
@@ -275,7 +300,7 @@ def _get_lead_id(sumstats=None, region_ref=None, log=None, verbose=True):
275
300
 
276
301
  return lead_id
277
302
 
278
- def _pinpoint_lead(sumstats,ax1,region_ref, lead_color, marker_size, log, verbose,region_marker_shapes):
303
+ def _pinpoint_lead(sumstats,ax1,region_ref, lead_color, marker_size, log, verbose, region_marker_shapes):
279
304
 
280
305
  if region_ref is None:
281
306
  log.write(" -Extracting lead variant..." , verbose=verbose)
@@ -416,6 +441,11 @@ def _plot_gene_track(
416
441
  texts_to_adjust_left = []
417
442
  texts_to_adjust_middle = []
418
443
  texts_to_adjust_right = []
444
+
445
+
446
+ sig_gene_names=[]
447
+ sig_gene_lefts=[]
448
+ sig_gene_rights=[]
419
449
  for index,row in uniq_gene_region.iterrows():
420
450
 
421
451
  gene_color="#020080"
@@ -426,21 +456,18 @@ def _plot_gene_track(
426
456
  gene_anno = "<-" + row["name"]
427
457
 
428
458
 
429
- sig_gene_names=[]
430
- sig_gene_lefts=[]
431
- sig_gene_rights=[]
459
+
432
460
  for lead_snp_i in lead_snp_is:
433
461
  if region_lead_grid is True and lead_snp_i > gene_track_start_i+row["start"] and lead_snp_i < gene_track_start_i+row["end"] :
434
- gene_color=region_lead_grid_line["color"]
435
- sig_gene_names.append(row["name"])
436
- sig_gene_lefts.append(gene_track_start_i+row["start"])
437
- sig_gene_rights.append(gene_track_start_i+row["end"])
462
+ gene_color=region_lead_grid_line["color"]
463
+ sig_gene_names.append(row["name"])
464
+ sig_gene_lefts.append(gene_track_start_i+row["start"])
465
+ sig_gene_rights.append(gene_track_start_i+row["end"])
438
466
 
439
467
  # plot gene line
440
468
  ax3.plot((gene_track_start_i+row["start"],gene_track_start_i+row["end"]),
441
469
  (row["stack"]*2,row["stack"]*2),color=gene_color,linewidth=linewidth_in_points/10)
442
470
 
443
-
444
471
  # plot gene name
445
472
  if row["end"] >= region[2]:
446
473
  #right side
@@ -459,6 +486,7 @@ def _plot_gene_track(
459
486
  for index,row in exons.iterrows():
460
487
  exon_color="#020080"
461
488
  for sig_gene_name, sig_gene_left, sig_gene_right in zip(sig_gene_names,sig_gene_lefts,sig_gene_rights):
489
+
462
490
  if not pd.isnull(row["name"]):
463
491
  if (region_lead_grid is True) and row["name"]==sig_gene_name:
464
492
  exon_color = region_lead_grid_line["color"]
@@ -468,7 +496,7 @@ def _plot_gene_track(
468
496
  exon_color = region_lead_grid_line["color"]
469
497
  else:
470
498
  exon_color="#020080"
471
-
499
+
472
500
  ax3.plot((gene_track_start_i+row["start"],gene_track_start_i+row["end"]),
473
501
  (row["stack"]*2,row["stack"]*2),linewidth=linewidth_in_points*taf[3],color=exon_color,solid_capstyle="butt")
474
502
 
@@ -550,24 +578,42 @@ def process_vcf(sumstats,
550
578
  # figure out lead variant
551
579
  lead_id = _get_lead_id(sumstats, region_ref_single, log, verbose)
552
580
 
553
- if lead_id is None:
554
- sumstats[rsq] = None
555
- sumstats[rsq] = sumstats[rsq].astype("float")
556
- sumstats[ld_single] = 0
557
- continue
558
581
 
559
- lead_pos = sumstats.loc[lead_id,pos]
582
+ lead_series = None
583
+ if lead_id is None:
584
+
585
+ matched_snpid = re.match("(chr)?[0-9]+:[0-9]+:[ATCG]+:[ATCG]+",region_ref_single, re.IGNORECASE)
586
+
587
+ if matched_snpid is None:
588
+ sumstats[rsq] = None
589
+ sumstats[rsq] = sumstats[rsq].astype("float")
590
+ sumstats[ld_single] = 0
591
+ continue
592
+ else:
593
+
594
+ lead_snpid = matched_snpid.group(0).split(":")[1:]
595
+ lead_pos = int(lead_snpid[0])
596
+ lead_snpid[0]= int(lead_snpid[0])
597
+ lead_series = pd.Series(lead_snpid)
598
+ else:
599
+ lead_pos = sumstats.loc[lead_id,pos]
560
600
 
601
+
561
602
  # if lead pos is available:
562
603
  if lead_pos in ref_genotype["variants/POS"]:
563
604
 
564
605
  # get ref index for lead snp
565
- lead_snp_ref_index = match_varaint(sumstats.loc[lead_id,[pos,nea,ea]])
566
- #lead_snp_ref_index = np.where(ref_genotype["variants/POS"] == lead_pos)[0][0]
606
+ if lead_series is None:
607
+ lead_snp_ref_index = match_varaint(sumstats.loc[lead_id,[pos,nea,ea]])
608
+ #lead_snp_ref_index = np.where(ref_genotype["variants/POS"] == lead_pos)[0][0]
609
+ else:
610
+ log.warning("Computing LD: {} not found in sumstats but found in reference...Still Computing...".format(region_ref_single))
611
+ lead_snp_ref_index = match_varaint(lead_series)
567
612
 
568
613
  # non-na other snp index
569
614
  other_snps_ref_index = sumstats["REFINDEX"].dropna().astype("int").values
570
615
  # get genotype
616
+
571
617
  lead_snp_genotype = GenotypeArray([ref_genotype["calldata/GT"][lead_snp_ref_index]]).to_n_alt()
572
618
  try:
573
619
  if len(set(lead_snp_genotype[0]))==1:
@@ -604,10 +650,10 @@ def process_vcf(sumstats,
604
650
  sumstats.loc[to_change_color,ld_single] = 1
605
651
  to_change_color = sumstats[rsq]>ld_threshold
606
652
  sumstats.loc[to_change_color,ld_single] = index+2
607
-
608
- sumstats.loc[lead_id,ld_single] = len(region_ld_threshold)+2
609
653
 
610
- sumstats.loc[lead_id,lead] = 1
654
+ if lead_series is None:
655
+ sumstats.loc[lead_id,ld_single] = len(region_ld_threshold)+2
656
+ sumstats.loc[lead_id,lead] = 1
611
657
 
612
658
  ####################################################################################################
613
659
  final_shape_col = "SHAPE"