offtracker 2.7.7__zip → 2.7.10__zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. offtracker-2.7.10/PKG-INFO +189 -0
  2. offtracker-2.7.10/README.md +177 -0
  3. offtracker-2.7.10/offtracker/X_offplot.py +539 -0
  4. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/X_offtracker.py +2 -1
  5. offtracker-2.7.10/offtracker/_version.py +30 -0
  6. offtracker-2.7.10/offtracker.egg-info/PKG-INFO +189 -0
  7. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker.egg-info/SOURCES.txt +2 -1
  8. {offtracker-2.7.7 → offtracker-2.7.10}/scripts/offtracker_analysis.py +18 -9
  9. offtracker-2.7.10/scripts/offtracker_plot.py +39 -0
  10. {offtracker-2.7.7 → offtracker-2.7.10}/setup.py +5 -2
  11. offtracker-2.7.7/PKG-INFO +0 -146
  12. offtracker-2.7.7/README.md +0 -134
  13. offtracker-2.7.7/offtracker/X_offplot.py +0 -123
  14. offtracker-2.7.7/offtracker/_version.py +0 -27
  15. offtracker-2.7.7/offtracker.egg-info/PKG-INFO +0 -146
  16. {offtracker-2.7.7 → offtracker-2.7.10}/LICENSE.txt +0 -0
  17. {offtracker-2.7.7 → offtracker-2.7.10}/MANIFEST.in +0 -0
  18. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/X_sequence.py +0 -0
  19. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/__init__.py +0 -0
  20. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/1.1_bed2fr_v4.5.py +0 -0
  21. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/1.3_bdg_normalize_v4.0.py +0 -0
  22. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/Snakefile_offtracker +0 -0
  23. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/bedGraphToBigWig +0 -0
  24. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/hg38.chrom.sizes +0 -0
  25. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/mm10.chrom.sizes +0 -0
  26. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/offtracker_blacklist_hg38.merged.bed +0 -0
  27. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/offtracker_blacklist_mm10.merged.bed +0 -0
  28. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker.egg-info/dependency_links.txt +0 -0
  29. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker.egg-info/requires.txt +0 -0
  30. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker.egg-info/top_level.txt +0 -0
  31. {offtracker-2.7.7 → offtracker-2.7.10}/scripts/offtracker_candidates.py +0 -0
  32. {offtracker-2.7.7 → offtracker-2.7.10}/scripts/offtracker_config.py +0 -0
  33. {offtracker-2.7.7 → offtracker-2.7.10}/setup.cfg +0 -0
@@ -0,0 +1,539 @@
1
+
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import matplotlib.patches as patches
6
+ from matplotlib import rcParams
7
+ # 和用 plt.rcParams or matplotlib.rcParams 是一样的
8
+ dict_rc = {
9
+ 'pdf.fonttype': 42,
10
+ 'font.family': ['Arial']
11
+ }
12
+ rcParams.update(dict_rc)
13
+
14
+ # 2024.06.03. offtable 添加 threshold 分界线,默认为 None,常用的是 2
15
+ def offtable(offtargets, target_guide, length_pam = 3,
16
+ col_seq='best_target', col_score='track_score', col_mismatch='mismatch', col_loc='target_location',
17
+ title=None, font='Arial', font_size=9,
18
+ box_size_x=15, box_size_y=20, box_gap=1, threshold=None,
19
+ x_offset=15, y_offset=35, dpi=300, savefig=None):
20
+ # Facecolor
21
+ color_dict = {
22
+ 'A': 'lightgreen',
23
+ 'T': 'lightblue',
24
+ 'C': 'lightcoral',
25
+ 'G': 'lightgoldenrodyellow',
26
+ 'N': 'lightgrey',
27
+ '—': 'orange',
28
+ '-': 'orange'
29
+ }
30
+
31
+ # If offtargets is a DataFrame, convert to list of dictionaries
32
+ if isinstance(offtargets, pd.DataFrame):
33
+ if threshold is not None:
34
+ n_positive = sum(offtargets[col_score]>=threshold)
35
+ offtargets = offtargets.to_dict(orient='records')
36
+
37
+ # Configuration
38
+ # title=None
39
+ # font='Arial'
40
+ # font_size = 9
41
+ # box_size_x = 15 # 一个碱基图形的宽度
42
+ # box_size_y = 20 # 一个碱基图形的高度
43
+ # box_gap = 1 # 两行之间的间隔
44
+ # x_offset = 15
45
+ # y_offset = 35
46
+ # dpi=300
47
+ # col_seq='best_target'
48
+ # col_score='track_score'
49
+ # col_mismatch='mismatch'
50
+ # col_loc='target_location'
51
+ width = box_size_x * (len(target_guide) + 15)
52
+ height = y_offset + (len(offtargets) + 2) * (box_size_y + box_gap)
53
+ fig = plt.figure(figsize=(width / 100.0, height / 100.0), dpi=dpi)
54
+ ax = fig.add_subplot(111)
55
+
56
+ # Plot a title
57
+ ax.text(x_offset, 25, "Off-targets table" if title is None else f"{title}", fontsize=14, family=font)
58
+
59
+ # Plot the reference sequence
60
+ for i, c in enumerate(target_guide):
61
+ x = x_offset + i * box_size_x
62
+ y = y_offset
63
+ base_color = color_dict.get(c, 'purple') # Default to purple if base is not recognized
64
+ ax.add_patch(patches.Rectangle((x, y), box_size_x, box_size_y, facecolor=base_color))
65
+ ax.text(x + box_size_x / 2, y + box_size_y / 2, c, ha='center', va='center', family=font, fontsize=font_size)
66
+ # add column annotations
67
+ ax.text(x_offset + (len(target_guide) + 2) * box_size_x, y_offset + box_size_y / 4, 'Track\nScore', ha='center', va='center', family=font, fontsize=font_size*1.1)
68
+ #ax.text(x_offset + (len(target_guide) + 7) * box_size_x, y_offset + box_size_y / 2, 'Mismatch', ha='center', va='center', family=font, fontsize=font_size*1.1)
69
+ ax.text(x_offset + (len(target_guide) + 4) * box_size_x, y_offset + box_size_y / 2, 'Coordinates', ha='left', va='center', family=font, fontsize=font_size*1.1)
70
+
71
+ # Plot aligned sequences
72
+ # 目前有个bug:脱靶序列如果有 insertion,长度会不一致,而且也没想到画图怎么画,只能是默认删掉第一个碱基
73
+ for j, seq in enumerate(offtargets):
74
+ y = y_offset + (j + 1) * (box_size_y + box_gap)
75
+ # 长度不一致的情况
76
+ len_out = len(seq[col_seq]) - len(target_guide)
77
+ if len_out > 0:
78
+ if len_out > 1:
79
+ print(f"Warning: {seq[col_seq]} is {len_out} longer than {target_guide}")
80
+ # 通过比较删除开头的碱基和最后的碱基,看哪个更接近target_guide
81
+ delete_first = seq[col_seq][len_out:]
82
+ delete_last = seq[col_seq][:-len_out]
83
+ # 计算两个序列和target_guide的hamming distance
84
+ hamming_first = sum([1 for i, c in enumerate(delete_first) if c != target_guide[i]])
85
+ hamming_last = sum([1 for i, c in enumerate(delete_last) if c != target_guide[i]])
86
+ # 选择hamming distance小的那个序列
87
+ if hamming_first < hamming_last:
88
+ seq[col_seq] = delete_first
89
+ else:
90
+ seq[col_seq] = delete_last
91
+ elif len_out < 0:
92
+ print(f"Warning: {seq[col_seq]} is {-len_out} shorter than {target_guide}")
93
+
94
+ for i, c in enumerate(seq[col_seq]):
95
+ # gap 的 - (minus sign) 太短了,所以替换成 — (em dash)
96
+ if c == '-':
97
+ c = '—'
98
+ x = x_offset + i * box_size_x
99
+ base_color = color_dict.get(c, 'purple') # Default to purple if base is not recognized
100
+ if c == target_guide[i]:
101
+ ax.add_patch(patches.Rectangle((x, y), box_size_x, box_size_y, facecolor='white')) # same
102
+ elif target_guide[i] == 'N':
103
+ ax.add_patch(patches.Rectangle((x, y), box_size_x, box_size_y, facecolor='white')) # N in target
104
+ else:
105
+ ax.add_patch(patches.Rectangle((x, y), box_size_x, box_size_y, facecolor=base_color))
106
+ ax.text(x + box_size_x / 2, y + box_size_y / 2, "." if c == target_guide[i] else c, ha='center', va='center', family=font, fontsize=font_size, weight='bold')
107
+
108
+ # Annotations for score, mismatches, and location coordinates
109
+ ax.text(x_offset + (len(target_guide) + 2) * box_size_x, y + box_size_y / 2, round(seq[col_score],2), ha='center', va='center', family=font, fontsize=font_size)
110
+ #ax.text(x_offset + (len(target_guide) + 7) * box_size_x, y + box_size_y / 2, "Target" if seq[col_mismatch] == 0 else seq[col_mismatch], ha='center', va='center', family=font, fontsize=font_size, color='red' if seq[col_mismatch] == 0 else 'black')
111
+ ax.text(x_offset + (len(target_guide) + 4) * box_size_x, y + box_size_y / 2, seq[col_loc], ha='left', va='center', family=font, fontsize=font_size)
112
+
113
+ # add a vertical line to indicate the PAM
114
+ x_line = x_offset + (len(target_guide) - length_pam) * box_size_x
115
+ y_start = y_offset # + box_size_y / 2
116
+ y_end = y_start + (len(offtargets)+1) * (box_size_y + box_gap)
117
+ ax.vlines(x=x_line, ymin=y_start, ymax=y_end, color='indianred', linestyle='--')
118
+
119
+ # 2024.06.03. add a horizontal line to indicate the threshold
120
+ if threshold is not None:
121
+ thresh_x_start = x_offset
122
+ thresh_x_end = x_offset + len(target_guide) * box_size_x
123
+ thresh_y = y_offset + (n_positive+1) * (box_size_y + box_gap) - box_gap*0.5
124
+ ax.hlines(y=thresh_y, xmin=thresh_x_start, xmax=thresh_x_end, color='orange', linestyle='--')
125
+
126
+ # Styling and save
127
+ ax.set_xlim(0, width*1.1) # location 的文字太长了,所以要加长一点
128
+ ax.set_ylim(height, 0)
129
+ ax.axis('off')
130
+
131
+ # # This will make the subplot(s) expand to fill the entire figure area, with no padding on any side.
132
+ # # In brief, make the plot bigger (not influence the font size)
133
+ plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
134
+ if savefig is not None:
135
+ plt.savefig(savefig, dpi=dpi)
136
+ plt.show()
137
+ return ax
138
+
139
+ # summary_method: mean (default) or average, max, min, stdev, dev, coverage, cov or sum.
140
+ # number_of_bins: 700 (default) or any integer above 1
141
+ # 设置 bin_size 可以用来自动调整 number_of_bins,但是如果 **properties 里有 number_of_bins,就会被覆盖
142
+ def igv_tracking(location, file_fw, file_rv, track_name='', track_name_loc='left',
143
+ fig=None, track_position = 0, track_gap = 0.2, single_height=1, bin_size=None,
144
+ fig_scale = 0.5, aspect_ratio = 5, ax_gap = 0.02, show_title=True, spine_width=0.5,
145
+ track_color='red', ex_length = 10000, set_ymax_fw = None, set_ymin_rv = None,
146
+ min_ymax = None,
147
+ savefig=None, savedpi=200, **properties):
148
+ # only for plotting tracking-seq bw files
149
+ import pygenometracks.tracks as pygtk
150
+ # 一般连画时,后者都会输入 track_position
151
+ if track_position !=0 :
152
+ show_title = False
153
+
154
+ if fig is None:
155
+ fig = plt.figure(figsize=(fig_scale, fig_scale))
156
+
157
+ track_height=2*single_height+track_gap*2+ax_gap
158
+ track_position = track_position - track_height
159
+ fw_ax = fig.add_axes([0, track_position + track_gap + single_height + ax_gap, aspect_ratio, single_height])
160
+ rv_ax = fig.add_axes([0, track_position + track_gap , aspect_ratio, single_height])
161
+
162
+ location = location.replace(',','')
163
+ chrom = location.split(':')[0]
164
+ start_region,end_region = location.split(':')[1].split('-')
165
+ start_region = int(start_region) - ex_length
166
+ end_region = int(end_region) + ex_length
167
+
168
+ track_config_fw = dict(file=file_fw)
169
+ tk_fw = pygtk.BigWigTrack(track_config_fw)
170
+ tk_fw.properties['color'] = track_color
171
+ tk_fw.properties['negative_color'] = track_color
172
+ if bin_size is not None:
173
+ n_bins = (end_region-start_region)//bin_size
174
+ tk_fw.properties['number_of_bins'] = n_bins
175
+ # for properties in kwargs:
176
+ for key, value in properties.items():
177
+ tk_fw.properties[key] = value
178
+ tk_fw.plot(fw_ax,chrom,start_region,end_region,)
179
+ ymax_fw = fw_ax.get_ylim()[1]
180
+ print('ymax_fw',ymax_fw)
181
+ if set_ymax_fw:
182
+ fw_ax.set_ylim(0,set_ymax_fw)
183
+ real_ymax_fw = set_ymax_fw
184
+ else:
185
+ if min_ymax is not None:
186
+ ymax_fw = max(ymax_fw,min_ymax)
187
+ fw_ax.set_ylim(0,ymax_fw)
188
+ real_ymax_fw = ymax_fw
189
+ fw_ax.set_xlim(start_region,end_region)
190
+ # hide the spine and ticks
191
+ for spine in fw_ax.spines.values():
192
+ spine.set_visible(False)
193
+ fw_ax.spines['bottom'].set_visible(True)
194
+ fw_ax.spines['bottom'].set_linewidth(fig_scale*spine_width)
195
+ #fw_ax.spines['bottom'].set_color(track_color)
196
+ fw_ax.tick_params(bottom=False, labelbottom=False, left=False, labelleft=False)
197
+
198
+ track_config_rv = dict(file=file_rv)
199
+ tk_rv = pygtk.BigWigTrack(track_config_rv)
200
+ tk_rv.properties['color'] = track_color
201
+ tk_rv.properties['negative_color'] = track_color
202
+ if bin_size is not None:
203
+ n_bins = (end_region-start_region)//bin_size
204
+ tk_rv.properties['number_of_bins'] = n_bins
205
+ # for properties in kwargs:
206
+ for key, value in properties.items():
207
+ tk_rv.properties[key] = value
208
+ tk_rv.plot(rv_ax,chrom,start_region,end_region,)
209
+ ymin_rv = rv_ax.get_ylim()[0]
210
+ print('ymin_rv',ymin_rv)
211
+ if set_ymin_rv:
212
+ rv_ax.set_ylim(set_ymin_rv,0)
213
+ real_ymin_rv = set_ymin_rv
214
+ else:
215
+ if min_ymax is not None:
216
+ ymin_rv = min(ymin_rv,-min_ymax)
217
+ rv_ax.set_ylim(ymin_rv,0)
218
+ real_ymin_rv = ymin_rv
219
+ rv_ax.set_xlim(start_region,end_region) # 实际上没必要,因为 sharex='col'
220
+ # hide the spine and ticks
221
+ for spine in rv_ax.spines.values():
222
+ spine.set_visible(False)
223
+ rv_ax.spines['top'].set_visible(True)
224
+ rv_ax.spines['top'].set_linewidth(fig_scale*spine_width)
225
+ #rv_ax.spines['top'].set_color(track_color)
226
+ rv_ax.tick_params(bottom=False, labelbottom=False, left=False, labelleft=False)
227
+
228
+ # add y range on the left top
229
+ # 如果 properties 里有 summary_method = 'sum', 则除以 bin size
230
+ showed_ymax = real_ymax_fw
231
+ showed_ymin = real_ymin_rv
232
+ if 'summary_method' in properties:
233
+ if properties['summary_method'] == 'sum':
234
+ showed_ymax = real_ymax_fw/bin_size
235
+ showed_ymin = real_ymin_rv/bin_size
236
+ fw_ax.text(start_region, real_ymax_fw, f'{showed_ymin:.1f}-{showed_ymax:.1f}', ha='left', va='top', fontsize=fig_scale*15)
237
+
238
+ # add track name to the left or right
239
+ x_range = end_region - start_region
240
+ x_gap = x_range*0.02
241
+ if track_name_loc == 'left':
242
+ fw_ax.text(start_region-x_gap, 0, track_name, ha='right', va='center', fontsize=fig_scale*20)
243
+ else:
244
+ fw_ax.text(end_region+x_gap, 0, track_name, ha='left', va='center', fontsize=fig_scale*20)
245
+
246
+ print(f'{chrom}:{start_region}-{end_region}')
247
+ if show_title:
248
+ region_length = round((end_region - start_region)/1000,1)
249
+ fw_ax.set_title(f'{chrom}:{start_region}-{end_region}\n({region_length:g} kb)',loc='center',fontsize=fig_scale*20)
250
+
251
+ if savefig is not None:
252
+ plt.savefig(savefig, bbox_inches='tight', dpi=savedpi)
253
+
254
+ return fig, track_position
255
+
256
+
257
+ def igv_single(location, file, fig=None, track_name='', track_name_loc='left',
258
+ track_position = 0, track_gap = 0.2, bin_size=None,
259
+ fig_scale = 0.5, aspect_ratio = 5, show_title=True, spine_width=0.5,
260
+ track_color='red', ex_length = 10000, set_ymax_single = None, min_ymax=None,
261
+ savefig=None, savedpi=200, **properties):
262
+ # for plotting a general bw file
263
+ import pygenometracks.tracks as pygtk
264
+
265
+ # 一般连画时,后者都会输入 track_position
266
+ if track_position !=0 :
267
+ show_title = False
268
+
269
+ if fig is None:
270
+ fig = plt.figure(figsize=(fig_scale, fig_scale))
271
+
272
+ track_height=1+track_gap*2
273
+ track_position = track_position - track_height
274
+ single_ax = fig.add_axes([0, track_position + track_gap, aspect_ratio, 1])
275
+
276
+ location = location.replace(',','')
277
+ chrom = location.split(':')[0]
278
+ start_region,end_region = location.split(':')[1].split('-')
279
+ start_region = int(start_region) - ex_length
280
+ end_region = int(end_region) + ex_length
281
+
282
+ track_config_single = dict(file=file)
283
+ tk_single = pygtk.BigWigTrack(track_config_single)
284
+ tk_single.properties['color'] = track_color
285
+ tk_single.properties['negative_color'] = track_color
286
+ if bin_size is not None:
287
+ n_bins = (end_region-start_region)//bin_size
288
+ tk_single.properties['number_of_bins'] = n_bins
289
+ # for properties in kwargs:
290
+ for key, value in properties.items():
291
+ tk_single.properties[key] = value
292
+ tk_single.plot(single_ax,chrom,start_region,end_region,)
293
+ ymax_single = single_ax.get_ylim()[1]
294
+ print('ymax_single',ymax_single)
295
+ if set_ymax_single:
296
+ single_ax.set_ylim(0,set_ymax_single)
297
+ ylim_middle = set_ymax_single/2
298
+ real_ymax = set_ymax_single
299
+ else:
300
+ if min_ymax is not None:
301
+ ymax_single = max(ymax_single,min_ymax)
302
+ single_ax.set_ylim(0,ymax_single)
303
+ ylim_middle = ymax_single/2
304
+ real_ymax = ymax_single
305
+ single_ax.set_xlim(start_region,end_region)
306
+ # hide the spine and ticks
307
+ for spine in single_ax.spines.values():
308
+ spine.set_visible(False)
309
+ single_ax.spines['bottom'].set_visible(True)
310
+ single_ax.spines['bottom'].set_linewidth(fig_scale*spine_width)
311
+ #single_ax.spines['bottom'].set_color(track_color)
312
+ single_ax.tick_params(bottom=False, labelbottom=False, left=False, labelleft=False)
313
+
314
+ # add y range on the left top
315
+ # 如果 properties 里有 summary_method = 'sum', 则除以 bin size
316
+ showed_ymax = real_ymax
317
+ if 'summary_method' in properties:
318
+ if properties['summary_method'] == 'sum':
319
+ showed_ymax = real_ymax/bin_size
320
+ single_ax.text(start_region, real_ymax, f'0-{showed_ymax:.0f}', ha='left', va='top', fontsize=fig_scale*15)
321
+
322
+ # add track name to the left or right
323
+ x_range = end_region - start_region
324
+ x_gap = x_range*0.02
325
+ if track_name_loc == 'left':
326
+ single_ax.text(start_region-x_gap, ylim_middle, track_name, ha='right', va='center', fontsize=fig_scale*20)
327
+ else:
328
+ single_ax.text(end_region+x_gap, ylim_middle, track_name, ha='left', va='center', fontsize=fig_scale*20)
329
+
330
+ print(f'{chrom}:{start_region}-{end_region}')
331
+ if show_title:
332
+ region_length = round((end_region - start_region)/1000,1)
333
+ single_ax.set_title(f'{chrom}:{start_region}-{end_region}\n({region_length:g} kb)',loc='center',fontsize=fig_scale*20)
334
+
335
+ if savefig is not None:
336
+ plt.savefig(savefig, bbox_inches='tight', dpi=savedpi)
337
+
338
+ return fig, track_position
339
+
340
+
341
+ from statsmodels.nonparametric.smoothers_lowess import lowess
342
+ def signal_length(df_bdg_chr, chrom, cleavage_site, end='end',start='start',value='residual',
343
+ flank_max=100000, bin_size=100, window_size=3000,signal_threshold = 0.3, show_plot=False, savefig=None, save_dpi=100):
344
+ df_bdg_chr = df_bdg_chr[df_bdg_chr['chr']==chrom]
345
+ ## left
346
+ # 取 cleavage_site 附近的数据
347
+ df_bdg_chr_L = df_bdg_chr[ (df_bdg_chr[end] >= cleavage_site-flank_max) & (df_bdg_chr[end]<=cleavage_site) ].copy()
348
+ y_L = df_bdg_chr_L[value]
349
+ n_bins_L = len(y_L)
350
+ x_L = np.arange(n_bins_L)
351
+ bins=n_bins_L ## 和 right 公用
352
+ # 用 window_size 做临近
353
+ frac = window_size/(bins*bin_size)
354
+ lowess_smoothed_L = lowess(y_L[-bins:], x_L[-bins:], frac=frac)
355
+ lowess_smoothed_L = lowess(lowess_smoothed_L[:, 1], lowess_smoothed_L[:, 0], frac=frac)
356
+ # 得到最后一个 <signal_threshold 的 index
357
+ bool_L = lowess_smoothed_L[:,1]<signal_threshold
358
+ index_L = np.where(bool_L)[0][-1]
359
+ if index_L == (bins-1):
360
+ # 可能是单边信号,数值反向
361
+ lowess_smoothed_L_reverse = -lowess_smoothed_L[:,1]
362
+ bool_L = lowess_smoothed_L_reverse<signal_threshold
363
+ index_L = np.where(bool_L)[0][-1]
364
+ # 考虑到 smooth,所以长度 + 1
365
+ index_L = index_L - 1
366
+ signal_L = lowess_smoothed_L[index_L+1:,1]
367
+ length_L = (len(signal_L)*bin_size)/1000
368
+ max_signal_L = y_L.max()
369
+ y_max_L = max_signal_L*1.2
370
+ left_region = chrom + ':' + df_bdg_chr_L.iloc[0,1].astype(str) + '-' + df_bdg_chr_L.iloc[-1,2].astype(str)
371
+
372
+ ## right
373
+ # 取 cleavage_site 附近的数据
374
+ df_bdg_chr_R = df_bdg_chr[ (df_bdg_chr[start] <= cleavage_site+flank_max) & (df_bdg_chr[start]>=cleavage_site) ].copy()
375
+ y_R = df_bdg_chr_R[value]
376
+ n_bins_R = len(y_R)
377
+ x_R = np.arange(n_bins_R)
378
+ # 用 window_size 做临近
379
+ frac = window_size/(bins*bin_size)
380
+ lowess_smoothed_R = lowess(y_R[:bins], x_R[:bins], frac=frac)
381
+ lowess_smoothed_R = lowess(lowess_smoothed_R[:, 1], lowess_smoothed_R[:, 0], frac=frac)
382
+ # 得到第一个 >-signal_threshold 的 index
383
+ bool_R = lowess_smoothed_R[:,1]>-signal_threshold
384
+ index_R = np.where(bool_R)[0][0]
385
+ if index_R == 0:
386
+ # 可能是单边信号,数值反向
387
+ lowess_smoothed_R_reverse = -lowess_smoothed_R[:,1]
388
+ bool_R = lowess_smoothed_R_reverse>-signal_threshold
389
+ index_R = np.where(bool_R)[0][0]
390
+ # 考虑到 smooth,所以长度 + 1
391
+ index_R = index_R + 1
392
+ signal_R = lowess_smoothed_R[:index_R,1]
393
+ length_R = (len(signal_R)*bin_size)/1000
394
+ min_signal_R = y_R.min()
395
+ y_mim_R = min_signal_R*1.2
396
+ right_region = chrom + ':' + df_bdg_chr_R.iloc[0,1].astype(str) + '-' + df_bdg_chr_R.iloc[-1,2].astype(str)
397
+
398
+ if show_plot:
399
+ fig = plt.figure(figsize=(10, 3))
400
+ ax1 = fig.add_axes([0.0, 0.1, 0.5, 0.8])
401
+ ax2 = fig.add_axes([0.5, 0.1, 0.5, 0.8])
402
+
403
+ # plot left
404
+ ax1.plot(range(bins), y_L[-bins:], label='Original')
405
+ ax1.plot(range(bins), lowess_smoothed_L[-bins:, 1], label='LOWESS', color='red')
406
+ ax1.plot([0,bins],[0,0],label='zero',color='black')
407
+ ax1.plot([0,bins],[signal_threshold,signal_threshold],label='threshold_left',color='orange')
408
+ ax1.plot([0,bins],[-signal_threshold,-signal_threshold],label='threshold_right',color='orange')
409
+ ax1.plot([index_L+1,index_L+1],[y_mim_R,y_max_L],label='length cutoff',color='orange')
410
+ ax1.set_ylim(y_mim_R,y_max_L)
411
+ ax1.set_xlim(-1,bins+1)
412
+ ax1.set_xlabel('distance to cleavage site (kb)')
413
+ ax1.set_title(left_region)
414
+
415
+ # add xticks
416
+ xtick_gap = 10000/bin_size # 10kb
417
+ n_xticks = int(np.ceil(bins/xtick_gap))
418
+ xticks = np.arange(0,n_xticks+1)*xtick_gap
419
+ xticks_label = np.arange(0,n_xticks+1)*10
420
+ xticks_label = np.flip(xticks_label)
421
+ # add length cutoff into xticks
422
+ # # 不加到xticks,可能会和原来的重合,改用text
423
+ # xticks = np.append(xticks, index_L+1)
424
+ # xticks_label = np.append(xticks_label, length_L)
425
+ ax1.text(index_L-3, 3, f'{length_L:g} kb', ha='right', va='top')
426
+ ax1.set_xticks(xticks)
427
+ _ = ax1.set_xticklabels([f'{x:g}' for x in xticks_label])
428
+ ax1.set_ylabel('signal difference\n(coverage per 10M reads)')
429
+
430
+ # plot right
431
+ ax2.plot(range(bins), y_R[:bins], label='Original')
432
+ ax2.plot(range(bins), lowess_smoothed_R[:bins, 1], label='LOWESS', color='red')
433
+ ax2.plot([0,bins],[0,0],label='zero',color='black')
434
+ ax2.plot([0,bins],[signal_threshold,signal_threshold],label='threshold_left',color='orange')
435
+ ax2.plot([0,bins],[-signal_threshold,-signal_threshold],label='threshold_right',color='orange')
436
+ ax2.plot([index_R,index_R],[y_mim_R,y_max_L],label='length cutoff',color='orange')
437
+ ax2.set_ylim(y_mim_R,y_max_L)
438
+ ax2.set_xlim(-1,bins+1)
439
+ ax2.set_xlabel('distance to cleavage site (kb)')
440
+ ax2.set_title(right_region)
441
+
442
+ # add xticks
443
+ xtick_gap = 10000/bin_size # 10kb
444
+ n_xticks = int(np.ceil(bins/xtick_gap))
445
+ xticks = np.arange(0,n_xticks+1)*xtick_gap
446
+ xticks_label = np.arange(0,n_xticks+1)*10
447
+ # add length cutoff into xticks
448
+ # # 不加到xticks,可能会和原来的重合,改用text
449
+ # xticks = np.append(xticks, index_R)
450
+ # xticks_label = np.append(xticks_label, length_R)
451
+ ax2.text(index_R+4, -3, f'{length_R:g} kb', ha='left', va='bottom')
452
+ ax2.set_xticks(xticks)
453
+ _ = ax2.set_xticklabels([f'{x:g}' for x in xticks_label])
454
+
455
+ # 左右两个图紧贴
456
+ ax2.set_yticks([])
457
+ ax2.set_yticklabels([])
458
+ ax2.set_ylabel('')
459
+ if savefig is not None:
460
+ plt.savefig(savefig, dpi=save_dpi, bbox_inches='tight')
461
+ #fig.tight_layout()
462
+ plt.show()
463
+ return length_L, length_R, lowess_smoothed_L, lowess_smoothed_R, y_L, y_R
464
+
465
+
466
+ def tracking_plot(signal_L, signal_R, bin_size=100, bins=None,
467
+ figsize=(10, 3), title='',
468
+ show_plot=True, fig=None, ax1=None, ax2=None,
469
+ savefig=None, save_dpi=300):
470
+ if bins is None:
471
+ bins=len(signal_L)
472
+ y_max_L = signal_L[-bins:].max()
473
+ y_mim_R = signal_R[:bins].min()
474
+
475
+ if fig is None:
476
+ fig = plt.figure(figsize=figsize)
477
+ ax1 = fig.add_axes([0.0, 0.1, 0.5, 0.8])
478
+ ax2 = fig.add_axes([0.5, 0.1, 0.5, 0.8])
479
+
480
+ # plot left
481
+ ax1.plot(range(bins), signal_L[-bins:], label='Original')
482
+ #ax1.plot(range(bins), lowess_smoothed_L[-bins:, 1], label='LOWESS', color='red')
483
+ ax1.plot([0,bins],[0,0],label='zero',color='black')
484
+ #ax1.plot([0,bins],[signal_threshold,signal_threshold],label='threshold_left',color='orange')
485
+ #ax1.plot([0,bins],[-signal_threshold,-signal_threshold],label='threshold_right',color='orange')
486
+ #ax1.plot([index_L+1,index_L+1],[y_mim_R,y_max_L],label='length cutoff',color='orange')
487
+ ax1.set_ylim(y_mim_R,y_max_L)
488
+ ax1.set_xlim(-1,bins+1)
489
+ ax1.set_xlabel('distance to cleavage site (kb)')
490
+ #ax1.set_title(left_region)
491
+
492
+ # add xticks
493
+ xtick_gap = 10000/bin_size # 10kb
494
+ n_xticks = int(np.ceil(bins/xtick_gap))
495
+ xticks = np.arange(0,n_xticks+1)*xtick_gap
496
+ xticks_label = np.arange(0,n_xticks+1)*10
497
+ xticks_label = np.flip(xticks_label)
498
+ ax1.set_xticks(xticks)
499
+ _ = ax1.set_xticklabels([f'{x:g}' for x in xticks_label])
500
+ ax1.set_ylabel('signal difference\n(coverage per 10M reads)')
501
+
502
+ # plot right
503
+ ax2.plot(range(bins), signal_R[:bins], label='Original')
504
+ #ax2.plot(range(bins), lowess_smoothed_R[:bins, 1], label='LOWESS', color='red')
505
+ ax2.plot([0,bins],[0,0],label='zero',color='black')
506
+ #ax2.plot([0,bins],[signal_threshold,signal_threshold],label='threshold_left',color='orange')
507
+ #ax2.plot([0,bins],[-signal_threshold,-signal_threshold],label='threshold_right',color='orange')
508
+ #ax2.plot([index_R,index_R],[y_mim_R,y_max_L],label='length cutoff',color='orange')
509
+ ax2.set_ylim(y_mim_R,y_max_L)
510
+ ax2.set_xlim(-1,bins+1)
511
+ ax2.set_xlabel('distance to cleavage site (kb)')
512
+ #ax2.set_title(right_region)
513
+
514
+ # add xticks
515
+ xtick_gap = 10000/bin_size # 10kb
516
+ n_xticks = int(np.ceil(bins/xtick_gap))
517
+ xticks = np.arange(0,n_xticks+1)*xtick_gap
518
+ xticks_label = np.arange(0,n_xticks+1)*10
519
+ ax2.set_xticks(xticks)
520
+ _ = ax2.set_xticklabels([f'{x:g}' for x in xticks_label])
521
+
522
+ # 左右两个图紧贴
523
+ ax2.set_yticks([])
524
+ ax2.set_yticklabels([])
525
+ ax2.set_ylabel('')
526
+
527
+ # 人造 title
528
+ ax2.text(0, y_max_L*1.1, title, ha='center', va='center')
529
+
530
+ if savefig is not None:
531
+ plt.savefig(savefig, dpi=save_dpi, bbox_inches='tight')
532
+
533
+ #fig.tight_layout()
534
+ if show_plot:
535
+ plt.show()
536
+
537
+ return fig, ax1, ax2
538
+
539
+
@@ -312,7 +312,7 @@ def target_signal_chunk(df_bdg_chr, df_alignment_chr, flank_max=100000, smooth_t
312
312
 
313
313
  # 2024.01.22. 额外写一个 signal length 算法,增加基于 pos_pct 而非 smooth 后的 overall_signal 的 length,叫 singal_length
314
314
  def signal_length(df_bdg_chr, chrom, cleavage_site, end='end',start='start',value='residual',
315
- flank_max=100000, binsize=100, pct_threshold=0.6):
315
+ flank_max=100000, binsize=100):
316
316
  # 输入数据必须是同一条染色体内的
317
317
  # Left
318
318
  df_bdg_chr_L = df_bdg_chr[ (df_bdg_chr[end] >= cleavage_site-flank_max) & (df_bdg_chr[end]<=cleavage_site) ].copy()
@@ -334,5 +334,6 @@ def signal_length(df_bdg_chr, chrom, cleavage_site, end='end',start='start',valu
334
334
  list_signal_pct_L = []
335
335
  list_pct_score_L = []
336
336
  list_signal_residual_L = []
337
+
337
338
 
338
339
  return list_return
@@ -0,0 +1,30 @@
1
+ __version__ = "2.7.10"
2
+ # 2023.08.11. v1.1.0 adding a option for not normalizing the bw file
3
+ # 2023.10.26. v1.9.0 prerelease for v2.0
4
+ # 2023.10.27. v2.0.0 大更新,还没微调
5
+ # 2023.10.28. v2.1.0 修复bug,增加计算信号长度的功能
6
+ # 2023.10.28. v2.2.0 修复bug,改变计算信号长度的算法
7
+ # 2023.10.29. v2.3.0 增加 overall signal 计算
8
+ # 2023.11.01. v2.3.1 增加 signal_only 选项
9
+ # 2023.11.02. v2.3.2 修改 sample signal 和 group mean 的计算顺序
10
+ # 2023.11.04. v2.3.3 修复 overall score 标准化时排序错误的问题
11
+ # 2023.11.05. v2.3.4 修复判断单边溢出信号时的列名选取错误
12
+ # 2023.11.13. v2.3.5 微调 track score
13
+ # 2023.12.05. v2.3.6 candidates 增加 cleavage site,修正 alignment 有 deletion 会错位的 bug
14
+ # 2023.12.05. v2.3.7 用 cleavage site 代替 midpoint # 还没改完
15
+ # 2023.12.07. v2.3.8 df_score 增加 df_exp, df_ctr 各自列。修复没 df_ctr 时的 bug。track score 用 proximal
16
+ # 2023.12.09. v2.4.0 为了兼顾 proximal 和 overall,当 normalized overall signal 高于 2 时,增加 overall signal 的加分
17
+ # 2023.12.09. v2.5.0 尝试新的加权位置
18
+ # 2023.12.10. v2.6.0 加入 trackseq v4 的计算分支,即考虑 Region 内的 positive_pct,避免短而尖锐的信号
19
+ # 2023.12.10. v2.6.1 有些非特异信号数值很大,如果在 control 组是大负数,可能导致减 control 后假高信号,因此给负数一个 clip
20
+ # 2023.12.30. v2.7.0 增加 X_offplot 模块,用于绘图
21
+ # 2023.12.31. v2.7.1 control 的负数值 clip 由 -5 改为 -1,进一步减少假阳性。另外不加 overall 了
22
+ # 2024.01.01. v2.7.2 权重改为 proximal + pct = 1 + 1. 防信号外溢假阳性标准由<0改为<=0
23
+ # 2024.01.02. v2.7.3 flank regions 默认值改为 1000 2000 3000 5000。之前 control 的负数值 clip 相当于直接在 final score,现在改为每个单独 clip 后重新算 score,默认值为 CtrClip=-0.5
24
+ # 2024.01.03. v2.7.4 更新了 blacklist.bed
25
+ # 2024.01.04. v2.7.5 更新了 hg38 blacklist.bed
26
+ # 2024.01.12. v2.7.6 修复小bug,输出 fdr 改为 <0.05。
27
+ # 2024.01.23. v2.7.7 Snakefile_offtracker: add --fixedStep to bigwigCompare for not merging neighbouring bins with equal values.
28
+ # 2024.02.01. v2.7.8 逐步添加 X_offplot.py 功能
29
+ # 2024.06.02. v2.7.9 添加 offtracker_plot.py
30
+ # 2024.06.03. v2.7.10 修复 bugs,offtable 添加 threshold = 2 的分界