offtracker 2.10.2__zip → 2.10.4__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {offtracker-2.10.2/offtracker.egg-info → offtracker-2.10.4}/PKG-INFO +2 -2
- {offtracker-2.10.2 → offtracker-2.10.4}/README.md +1 -1
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/X_offplot.py +0 -125
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/X_offtracker.py +0 -29
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/_version.py +2 -3
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/snakefile/Snakefile_offtracker.smk +16 -16
- {offtracker-2.10.2 → offtracker-2.10.4/offtracker.egg-info}/PKG-INFO +2 -2
- {offtracker-2.10.2 → offtracker-2.10.4}/scripts/offtracker_analysis.py +9 -9
- {offtracker-2.10.2 → offtracker-2.10.4}/scripts/offtracker_config.py +1 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/scripts/offtracker_qc.py +1 -3
- {offtracker-2.10.2 → offtracker-2.10.4}/LICENSE.txt +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/MANIFEST.in +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/X_sequence.py +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/__init__.py +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/snakefile/Snakefile_QC.smk +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/1.1_bed2fr.py +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/1.3_bdg_normalize_v4.0.py +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/bedGraphToBigWig +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/hg38.chrom.sizes +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/mm10.chrom.sizes +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/offtracker_blacklist_hg38.merged.bed +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/offtracker_blacklist_mm10.merged.bed +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker.egg-info/SOURCES.txt +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker.egg-info/dependency_links.txt +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker.egg-info/requires.txt +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/offtracker.egg-info/top_level.txt +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/scripts/offtracker_candidates.py +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/scripts/offtracker_plot.py +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/setup.cfg +0 -0
- {offtracker-2.10.2 → offtracker-2.10.4}/setup.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: offtracker
|
3
|
-
Version: 2.10.
|
3
|
+
Version: 2.10.4
|
4
4
|
Summary: Tracking-seq data analysis
|
5
5
|
Home-page: https://github.com/Lan-lab/offtracker
|
6
6
|
Author: Runda Xu
|
@@ -25,7 +25,7 @@ Offtracker is an end to end pipeline of Tracking-seq data analysis for detecting
|
|
25
25
|
# We recommend creating a new environment using mamba/conda to avoid compatibility problems
|
26
26
|
# If you don't use mamba, just replace the code with conda
|
27
27
|
# Windows systems may not be compatible with pybedtools.
|
28
|
-
mamba create -n offtracker -c bioconda blast snakemake pybedtools chromap
|
28
|
+
mamba create -n offtracker -c bioconda blast snakemake pybedtools deeptools chromap
|
29
29
|
```
|
30
30
|
|
31
31
|
|
@@ -13,7 +13,7 @@ Offtracker is an end to end pipeline of Tracking-seq data analysis for detecting
|
|
13
13
|
# We recommend creating a new environment using mamba/conda to avoid compatibility problems
|
14
14
|
# If you don't use mamba, just replace the code with conda
|
15
15
|
# Windows systems may not be compatible with pybedtools.
|
16
|
-
mamba create -n offtracker -c bioconda blast snakemake pybedtools chromap
|
16
|
+
mamba create -n offtracker -c bioconda blast snakemake pybedtools deeptools chromap
|
17
17
|
```
|
18
18
|
|
19
19
|
|
@@ -349,131 +349,6 @@ def igv_single(location, file, fig=None, track_name='', track_name_loc='left',
|
|
349
349
|
return fig, track_position
|
350
350
|
|
351
351
|
|
352
|
-
from statsmodels.nonparametric.smoothers_lowess import lowess
|
353
|
-
def signal_length(df_bdg_chr, chrom, cleavage_site, end='end',start='start',value='residual',
|
354
|
-
flank_max=100000, bin_size=100, window_size=3000,signal_threshold = 0.3, show_plot=False, savefig=None, save_dpi=100):
|
355
|
-
df_bdg_chr = df_bdg_chr[df_bdg_chr['chr']==chrom]
|
356
|
-
## left
|
357
|
-
# 取 cleavage_site 附近的数据
|
358
|
-
df_bdg_chr_L = df_bdg_chr[ (df_bdg_chr[end] >= cleavage_site-flank_max) & (df_bdg_chr[end]<=cleavage_site) ].copy()
|
359
|
-
y_L = df_bdg_chr_L[value]
|
360
|
-
n_bins_L = len(y_L)
|
361
|
-
x_L = np.arange(n_bins_L)
|
362
|
-
bins=n_bins_L ## 和 right 公用
|
363
|
-
# 用 window_size 做临近
|
364
|
-
frac = window_size/(bins*bin_size)
|
365
|
-
lowess_smoothed_L = lowess(y_L[-bins:], x_L[-bins:], frac=frac)
|
366
|
-
lowess_smoothed_L = lowess(lowess_smoothed_L[:, 1], lowess_smoothed_L[:, 0], frac=frac)
|
367
|
-
# 得到最后一个 <signal_threshold 的 index
|
368
|
-
bool_L = lowess_smoothed_L[:,1]<signal_threshold
|
369
|
-
index_L = np.where(bool_L)[0][-1]
|
370
|
-
if index_L == (bins-1):
|
371
|
-
# 可能是单边信号,数值反向
|
372
|
-
lowess_smoothed_L_reverse = -lowess_smoothed_L[:,1]
|
373
|
-
bool_L = lowess_smoothed_L_reverse<signal_threshold
|
374
|
-
index_L = np.where(bool_L)[0][-1]
|
375
|
-
# 考虑到 smooth,所以长度 + 1
|
376
|
-
index_L = index_L - 1
|
377
|
-
signal_L = lowess_smoothed_L[index_L+1:,1]
|
378
|
-
length_L = (len(signal_L)*bin_size)/1000
|
379
|
-
max_signal_L = y_L.max()
|
380
|
-
y_max_L = max_signal_L*1.2
|
381
|
-
left_region = chrom + ':' + df_bdg_chr_L.iloc[0,1].astype(str) + '-' + df_bdg_chr_L.iloc[-1,2].astype(str)
|
382
|
-
|
383
|
-
## right
|
384
|
-
# 取 cleavage_site 附近的数据
|
385
|
-
df_bdg_chr_R = df_bdg_chr[ (df_bdg_chr[start] <= cleavage_site+flank_max) & (df_bdg_chr[start]>=cleavage_site) ].copy()
|
386
|
-
y_R = df_bdg_chr_R[value]
|
387
|
-
n_bins_R = len(y_R)
|
388
|
-
x_R = np.arange(n_bins_R)
|
389
|
-
# 用 window_size 做临近
|
390
|
-
frac = window_size/(bins*bin_size)
|
391
|
-
lowess_smoothed_R = lowess(y_R[:bins], x_R[:bins], frac=frac)
|
392
|
-
lowess_smoothed_R = lowess(lowess_smoothed_R[:, 1], lowess_smoothed_R[:, 0], frac=frac)
|
393
|
-
# 得到第一个 >-signal_threshold 的 index
|
394
|
-
bool_R = lowess_smoothed_R[:,1]>-signal_threshold
|
395
|
-
index_R = np.where(bool_R)[0][0]
|
396
|
-
if index_R == 0:
|
397
|
-
# 可能是单边信号,数值反向
|
398
|
-
lowess_smoothed_R_reverse = -lowess_smoothed_R[:,1]
|
399
|
-
bool_R = lowess_smoothed_R_reverse>-signal_threshold
|
400
|
-
index_R = np.where(bool_R)[0][0]
|
401
|
-
# 考虑到 smooth,所以长度 + 1
|
402
|
-
index_R = index_R + 1
|
403
|
-
signal_R = lowess_smoothed_R[:index_R,1]
|
404
|
-
length_R = (len(signal_R)*bin_size)/1000
|
405
|
-
min_signal_R = y_R.min()
|
406
|
-
y_mim_R = min_signal_R*1.2
|
407
|
-
right_region = chrom + ':' + df_bdg_chr_R.iloc[0,1].astype(str) + '-' + df_bdg_chr_R.iloc[-1,2].astype(str)
|
408
|
-
|
409
|
-
if show_plot:
|
410
|
-
fig = plt.figure(figsize=(10, 3))
|
411
|
-
ax1 = fig.add_axes([0.0, 0.1, 0.5, 0.8])
|
412
|
-
ax2 = fig.add_axes([0.5, 0.1, 0.5, 0.8])
|
413
|
-
|
414
|
-
# plot left
|
415
|
-
ax1.plot(range(bins), y_L[-bins:], label='Original')
|
416
|
-
ax1.plot(range(bins), lowess_smoothed_L[-bins:, 1], label='LOWESS', color='red')
|
417
|
-
ax1.plot([0,bins],[0,0],label='zero',color='black')
|
418
|
-
ax1.plot([0,bins],[signal_threshold,signal_threshold],label='threshold_left',color='orange')
|
419
|
-
ax1.plot([0,bins],[-signal_threshold,-signal_threshold],label='threshold_right',color='orange')
|
420
|
-
ax1.plot([index_L+1,index_L+1],[y_mim_R,y_max_L],label='length cutoff',color='orange')
|
421
|
-
ax1.set_ylim(y_mim_R,y_max_L)
|
422
|
-
ax1.set_xlim(-1,bins+1)
|
423
|
-
ax1.set_xlabel('distance to cleavage site (kb)')
|
424
|
-
ax1.set_title(left_region)
|
425
|
-
|
426
|
-
# add xticks
|
427
|
-
xtick_gap = 10000/bin_size # 10kb
|
428
|
-
n_xticks = int(np.ceil(bins/xtick_gap))
|
429
|
-
xticks = np.arange(0,n_xticks+1)*xtick_gap
|
430
|
-
xticks_label = np.arange(0,n_xticks+1)*10
|
431
|
-
xticks_label = np.flip(xticks_label)
|
432
|
-
# add length cutoff into xticks
|
433
|
-
# # 不加到xticks,可能会和原来的重合,改用text
|
434
|
-
# xticks = np.append(xticks, index_L+1)
|
435
|
-
# xticks_label = np.append(xticks_label, length_L)
|
436
|
-
ax1.text(index_L-3, 3, f'{length_L:g} kb', ha='right', va='top')
|
437
|
-
ax1.set_xticks(xticks)
|
438
|
-
_ = ax1.set_xticklabels([f'{x:g}' for x in xticks_label])
|
439
|
-
ax1.set_ylabel('signal difference\n(coverage per 10M reads)')
|
440
|
-
|
441
|
-
# plot right
|
442
|
-
ax2.plot(range(bins), y_R[:bins], label='Original')
|
443
|
-
ax2.plot(range(bins), lowess_smoothed_R[:bins, 1], label='LOWESS', color='red')
|
444
|
-
ax2.plot([0,bins],[0,0],label='zero',color='black')
|
445
|
-
ax2.plot([0,bins],[signal_threshold,signal_threshold],label='threshold_left',color='orange')
|
446
|
-
ax2.plot([0,bins],[-signal_threshold,-signal_threshold],label='threshold_right',color='orange')
|
447
|
-
ax2.plot([index_R,index_R],[y_mim_R,y_max_L],label='length cutoff',color='orange')
|
448
|
-
ax2.set_ylim(y_mim_R,y_max_L)
|
449
|
-
ax2.set_xlim(-1,bins+1)
|
450
|
-
ax2.set_xlabel('distance to cleavage site (kb)')
|
451
|
-
ax2.set_title(right_region)
|
452
|
-
|
453
|
-
# add xticks
|
454
|
-
xtick_gap = 10000/bin_size # 10kb
|
455
|
-
n_xticks = int(np.ceil(bins/xtick_gap))
|
456
|
-
xticks = np.arange(0,n_xticks+1)*xtick_gap
|
457
|
-
xticks_label = np.arange(0,n_xticks+1)*10
|
458
|
-
# add length cutoff into xticks
|
459
|
-
# # 不加到xticks,可能会和原来的重合,改用text
|
460
|
-
# xticks = np.append(xticks, index_R)
|
461
|
-
# xticks_label = np.append(xticks_label, length_R)
|
462
|
-
ax2.text(index_R+4, -3, f'{length_R:g} kb', ha='left', va='bottom')
|
463
|
-
ax2.set_xticks(xticks)
|
464
|
-
_ = ax2.set_xticklabels([f'{x:g}' for x in xticks_label])
|
465
|
-
|
466
|
-
# 左右两个图紧贴
|
467
|
-
ax2.set_yticks([])
|
468
|
-
ax2.set_yticklabels([])
|
469
|
-
ax2.set_ylabel('')
|
470
|
-
if savefig is not None:
|
471
|
-
plt.savefig(savefig, dpi=save_dpi, bbox_inches='tight')
|
472
|
-
#fig.tight_layout()
|
473
|
-
plt.show()
|
474
|
-
return length_L, length_R, lowess_smoothed_L, lowess_smoothed_R, y_L, y_R
|
475
|
-
|
476
|
-
|
477
352
|
def tracking_plot(signal_L, signal_R, bin_size=100, bins=None,
|
478
353
|
figsize=(10, 3), title='',
|
479
354
|
show_plot=True, fig=None, ax1=None, ax2=None,
|
@@ -308,32 +308,3 @@ def target_signal_chunk(df_bdg_chr, df_alignment_chr, flank_max=100000, smooth_t
|
|
308
308
|
return df_result
|
309
309
|
|
310
310
|
|
311
|
-
|
312
|
-
|
313
|
-
# 2024.01.22. 额外写一个 signal length 算法,增加基于 pos_pct 而非 smooth 后的 overall_signal 的 length,叫 singal_length
|
314
|
-
def signal_length(df_bdg_chr, chrom, cleavage_site, end='end',start='start',value='residual',
|
315
|
-
flank_max=100000, binsize=100):
|
316
|
-
# 输入数据必须是同一条染色体内的
|
317
|
-
# Left
|
318
|
-
df_bdg_chr_L = df_bdg_chr[ (df_bdg_chr[end] >= cleavage_site-flank_max) & (df_bdg_chr[end]<=cleavage_site) ].copy()
|
319
|
-
|
320
|
-
# pos and neg
|
321
|
-
df_bdg_chr_L_flank_pos = df_bdg_chr_L_flank[df_bdg_chr_L_flank[value] > 0]
|
322
|
-
df_bdg_chr_L_flank_neg = df_bdg_chr_L_flank[df_bdg_chr_L_flank[value] <= 0]
|
323
|
-
n_pos_left = len(df_bdg_chr_L_flank_pos)
|
324
|
-
n_neg_left = len(df_bdg_chr_L_flank_neg)
|
325
|
-
# avoid zero
|
326
|
-
if n_pos_left == 0:
|
327
|
-
pos_pct_left = 0
|
328
|
-
else:
|
329
|
-
pos_pct_left = n_pos_left/(n_pos_left+n_neg_left)
|
330
|
-
|
331
|
-
|
332
|
-
df_bdg_chr_R = df_bdg_chr[ (df_bdg_chr[start] <= cleavage_site+flank_max) & (df_bdg_chr[start]>=cleavage_site) ].copy()
|
333
|
-
# list_signal_residual_L 数值和之前类似
|
334
|
-
list_signal_pct_L = []
|
335
|
-
list_pct_score_L = []
|
336
|
-
list_signal_residual_L = []
|
337
|
-
|
338
|
-
|
339
|
-
return list_return
|
@@ -1,4 +1,4 @@
|
|
1
|
-
__version__ = "2.10.
|
1
|
+
__version__ = "2.10.4"
|
2
2
|
# 2023.08.11. v1.1.0 adding a option for not normalizing the bw file
|
3
3
|
# 2023.10.26. v1.9.0 prerelease for v2.0
|
4
4
|
# 2023.10.27. v2.0.0 大更新,还没微调
|
@@ -33,5 +33,4 @@ __version__ = "2.10.2"
|
|
33
33
|
# 2025.04.25. v2.8.0 修复了 offtracker candidates 会把小写序列转换成 N 的 bug
|
34
34
|
# 2025.05.22. v2.9.0 翻新部分代码结构
|
35
35
|
# 2025.06.05. v2.10.0 增加了QC模块。保留了负数score的记录,并在plot时显示为红字。增加了 "--ignore_chr" 用于跳过common chr过滤。
|
36
|
-
# 2025.06.17. v2.10.
|
37
|
-
# 2025.06.17. v2.10.2 修复翻新代码结构导致的bug
|
36
|
+
# 2025.06.17. v2.10.4 修复翻新代码结构导致的bug
|
@@ -40,7 +40,7 @@ rule chromap:
|
|
40
40
|
R1=lambda w: _files_R1[w.sample],
|
41
41
|
R2=lambda w: _files_R2[w.sample]
|
42
42
|
threads:
|
43
|
-
|
43
|
+
_thread
|
44
44
|
params:
|
45
45
|
index=config["index"],
|
46
46
|
fasta=config["fasta"]
|
@@ -58,7 +58,7 @@ if config["blacklist"] != 'none':
|
|
58
58
|
input:
|
59
59
|
os.path.join(_output_dir,"{sample}.chromapx.bed")
|
60
60
|
threads:
|
61
|
-
|
61
|
+
_thread
|
62
62
|
params:
|
63
63
|
blacklist=config["blacklist"]
|
64
64
|
output:
|
@@ -70,7 +70,7 @@ if config["blacklist"] != 'none':
|
|
70
70
|
input:
|
71
71
|
os.path.join(_output_dir,"{sample}.filtered.bed")
|
72
72
|
threads:
|
73
|
-
|
73
|
+
_thread
|
74
74
|
params:
|
75
75
|
dir_script=config["utility_dir"],
|
76
76
|
ignore_chr=config["ignore_chr"],
|
@@ -84,7 +84,7 @@ else:
|
|
84
84
|
input:
|
85
85
|
os.path.join(_output_dir,"{sample}.chromapx.bed")
|
86
86
|
threads:
|
87
|
-
|
87
|
+
_thread
|
88
88
|
params:
|
89
89
|
dir_script=config["utility_dir"],
|
90
90
|
ignore_chr=config["ignore_chr"],
|
@@ -98,7 +98,7 @@ rule bed2bdg_fw:
|
|
98
98
|
input:
|
99
99
|
os.path.join(_output_dir,"{sample}.fw.bed")
|
100
100
|
threads:
|
101
|
-
|
101
|
+
_thread
|
102
102
|
params:
|
103
103
|
gl=config["genomelen"]
|
104
104
|
output:
|
@@ -110,7 +110,7 @@ rule bed2bdg_rv:
|
|
110
110
|
input:
|
111
111
|
os.path.join(_output_dir,"{sample}.rv.bed")
|
112
112
|
threads:
|
113
|
-
|
113
|
+
_thread
|
114
114
|
params:
|
115
115
|
gl=config["genomelen"]
|
116
116
|
output:
|
@@ -122,7 +122,7 @@ rule bdg_sort_fw:
|
|
122
122
|
input:
|
123
123
|
fw=os.path.join(_output_dir,"{sample}.fw.bdg")
|
124
124
|
threads:
|
125
|
-
|
125
|
+
_thread
|
126
126
|
output:
|
127
127
|
temp(os.path.join(_output_dir,"{sample}.fw.sorted.bdg"))
|
128
128
|
shell:
|
@@ -132,7 +132,7 @@ rule bdg_sort_rv:
|
|
132
132
|
input:
|
133
133
|
rv=os.path.join(_output_dir,"{sample}.rv.bdg")
|
134
134
|
threads:
|
135
|
-
|
135
|
+
_thread
|
136
136
|
output:
|
137
137
|
temp(os.path.join(_output_dir,"{sample}.rv.sorted.bdg"))
|
138
138
|
shell:
|
@@ -144,7 +144,7 @@ if _normalize == "True":
|
|
144
144
|
bdg=os.path.join(_output_dir,"{sample}.fw.sorted.bdg"),
|
145
145
|
bed=os.path.join(_output_dir,"{sample}.fw.bed")
|
146
146
|
threads:
|
147
|
-
|
147
|
+
_thread
|
148
148
|
params:
|
149
149
|
dir_script=config["utility_dir"]
|
150
150
|
output:
|
@@ -157,7 +157,7 @@ if _normalize == "True":
|
|
157
157
|
bdg=os.path.join(_output_dir,"{sample}.rv.sorted.bdg"),
|
158
158
|
bed=os.path.join(_output_dir,"{sample}.rv.bed")
|
159
159
|
threads:
|
160
|
-
|
160
|
+
_thread
|
161
161
|
params:
|
162
162
|
dir_script=config["utility_dir"]
|
163
163
|
output:
|
@@ -169,7 +169,7 @@ if _normalize == "True":
|
|
169
169
|
input:
|
170
170
|
os.path.join(_output_dir,"{sample}.fw.scaled.bdg")
|
171
171
|
threads:
|
172
|
-
|
172
|
+
_thread
|
173
173
|
params:
|
174
174
|
gl=config["genomelen"],
|
175
175
|
dir_script=config["utility_dir"]
|
@@ -182,7 +182,7 @@ if _normalize == "True":
|
|
182
182
|
input:
|
183
183
|
os.path.join(_output_dir,"{sample}.rv.scaled.bdg")
|
184
184
|
threads:
|
185
|
-
|
185
|
+
_thread
|
186
186
|
params:
|
187
187
|
gl=config["genomelen"],
|
188
188
|
dir_script=config["utility_dir"]
|
@@ -196,7 +196,7 @@ if _normalize == "True":
|
|
196
196
|
fw=os.path.join(_output_dir,"{sample}.fw.scaled.bw"),
|
197
197
|
rv=os.path.join(_output_dir,"{sample}.rv.scaled.bw")
|
198
198
|
threads:
|
199
|
-
|
199
|
+
_thread
|
200
200
|
output:
|
201
201
|
os.path.join(_output_dir,"{sample}." + _BinSize + ".add.bdg")
|
202
202
|
shell:
|
@@ -212,7 +212,7 @@ else:
|
|
212
212
|
input:
|
213
213
|
os.path.join(_output_dir,"{sample}.rv.sorted.bdg")
|
214
214
|
threads:
|
215
|
-
|
215
|
+
_thread
|
216
216
|
output:
|
217
217
|
temp(os.path.join(_output_dir,"{sample}.rv.sorted_r.bdg"))
|
218
218
|
shell:
|
@@ -222,7 +222,7 @@ else:
|
|
222
222
|
input:
|
223
223
|
os.path.join(_output_dir,"{sample}.fw.sorted.bdg")
|
224
224
|
threads:
|
225
|
-
|
225
|
+
_thread
|
226
226
|
params:
|
227
227
|
gl=config["genomelen"],
|
228
228
|
dir_script=config["utility_dir"]
|
@@ -235,7 +235,7 @@ else:
|
|
235
235
|
input:
|
236
236
|
os.path.join(_output_dir,"{sample}.rv.sorted_r.bdg")
|
237
237
|
threads:
|
238
|
-
|
238
|
+
_thread
|
239
239
|
params:
|
240
240
|
gl=config["genomelen"],
|
241
241
|
dir_script=config["utility_dir"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: offtracker
|
3
|
-
Version: 2.10.
|
3
|
+
Version: 2.10.4
|
4
4
|
Summary: Tracking-seq data analysis
|
5
5
|
Home-page: https://github.com/Lan-lab/offtracker
|
6
6
|
Author: Runda Xu
|
@@ -25,7 +25,7 @@ Offtracker is an end to end pipeline of Tracking-seq data analysis for detecting
|
|
25
25
|
# We recommend creating a new environment using mamba/conda to avoid compatibility problems
|
26
26
|
# If you don't use mamba, just replace the code with conda
|
27
27
|
# Windows systems may not be compatible with pybedtools.
|
28
|
-
mamba create -n offtracker -c bioconda blast snakemake pybedtools chromap
|
28
|
+
mamba create -n offtracker -c bioconda blast snakemake pybedtools deeptools chromap
|
29
29
|
```
|
30
30
|
|
31
31
|
|
@@ -26,7 +26,7 @@ def main():
|
|
26
26
|
parser.add_argument('--name' , type=str, required=True, help='custom name of the sgRNA' )
|
27
27
|
parser.add_argument('--exp' , type=str, default='all', nargs='+', help='A substring mark in the name of experimental samples. The default is to use all samples other than control' )
|
28
28
|
parser.add_argument('--control' , type=str, default='none', nargs='+', help='A substring mark in the name of control samples. The default is no control. "others" for all samples other than --exp.' )
|
29
|
-
parser.add_argument('--fdr' , type=int, default=0.
|
29
|
+
parser.add_argument('--fdr' , type=int, default=0.01, help='FDR threshold for the final result. Default is 0.01.')
|
30
30
|
parser.add_argument('--score' , type=int, default=2, help='Track score threshold for the final result. Default is 2.')
|
31
31
|
parser.add_argument('--smooth' , type=int, default=1, help='Smooth strength for the signal.')
|
32
32
|
parser.add_argument('--window' , type=int, default=3, help='Window size for smoothing the signal.')
|
@@ -93,7 +93,7 @@ def main():
|
|
93
93
|
all_sample_files = []
|
94
94
|
for a_folder in folders:
|
95
95
|
bdg_files = pd.Series(glob.glob(os.path.join( a_folder, '*.add.bdg' ))).sort_values().reset_index(drop=True)
|
96
|
-
sample_names = bdg_files.apply(os.path.basename).str.extract('(.*)\.\d+\.add\.bdg',expand=False)
|
96
|
+
sample_names = bdg_files.apply(os.path.basename).str.extract(r'(.*)\.\d+\.add\.bdg',expand=False)
|
97
97
|
all_sample_names.extend( sample_names )
|
98
98
|
all_sample_files.extend( bdg_files )
|
99
99
|
all_sample_files = pd.Series(all_sample_files)
|
@@ -209,7 +209,7 @@ def main():
|
|
209
209
|
df_score = pd.read_csv(output, index_col=0)
|
210
210
|
else:
|
211
211
|
signal_files = pd.Series(glob.glob( os.path.join(outdir, 'temp', f'*{sgRNA_name}.signal.csv') ))
|
212
|
-
signal_names = signal_files.apply(os.path.basename).str.extract(
|
212
|
+
signal_names = signal_files.apply(os.path.basename).str.extract(rf'(.*)\.{sgRNA_name}\.signal\.csv',expand=False)
|
213
213
|
|
214
214
|
# 读取并合并 samples
|
215
215
|
list_df_exp_samples = []
|
@@ -287,7 +287,7 @@ def main():
|
|
287
287
|
# 整理表格
|
288
288
|
mean_seq_score = round(df_score['best_seq_score'].mean(),3)
|
289
289
|
df_score['norm_best_seq_score'] = np.power(df_score['best_seq_score']/mean_seq_score, seq_score_power)
|
290
|
-
df_score['final_score_1'] = df_score[
|
290
|
+
df_score['final_score_1'] = df_score['proximal_signal']*df_score['norm_best_seq_score']
|
291
291
|
df_score['final_score_2'] = df_score['pct_score']*df_score['norm_best_seq_score']
|
292
292
|
#df_score['final_score_2'] = df_score[f'overall_signal']*df_score['norm_best_seq_score']
|
293
293
|
df_score['raw_score'] = df_score['final_score_1'] + df_score['final_score_2']
|
@@ -303,10 +303,10 @@ def main():
|
|
303
303
|
score_bkg = df_result['raw_score'][n_outliers:-n_outliers]
|
304
304
|
mean_score_bkg = score_bkg.mean()
|
305
305
|
std_score_bkg = score_bkg.std()
|
306
|
-
df_result['track_score'] = (df_result[
|
307
|
-
df_result['track_score'] = df_result[
|
306
|
+
df_result['track_score'] = (df_result['raw_score'] - mean_score_bkg) / std_score_bkg
|
307
|
+
df_result['track_score'] = df_result['track_score']*target_std + 1
|
308
308
|
df_result = df_result.sort_values(by='track_score', ascending=False)
|
309
|
-
df_result['log2_track_score'] = np.log2(df_result[
|
309
|
+
df_result['log2_track_score'] = np.log2(df_result['track_score'].clip(lower=0.5))
|
310
310
|
|
311
311
|
# 单边信号周围有更高分的,去掉
|
312
312
|
# v2.1 后 cols_L, cols_R 要手动
|
@@ -345,7 +345,7 @@ def main():
|
|
345
345
|
mu, std = norm.fit(score_for_fitting)
|
346
346
|
print('mean_score:{:.3f};std:{:.3f}'.format(mu,std))
|
347
347
|
# pv and fdr
|
348
|
-
df_result['pv'] = df_result[
|
348
|
+
df_result['pv'] = df_result['log2_track_score'].apply( lambda x: norm.sf(x,loc=mu,scale=std) )
|
349
349
|
df_result['pv'] = df_result['pv'].clip(lower=1e-320)
|
350
350
|
df_result['fdr'] = offtracker.fdr(df_result['pv'])
|
351
351
|
df_result['rank'] = range(1,len(df_result)+1)
|
@@ -354,7 +354,7 @@ def main():
|
|
354
354
|
bool_fdr = df_result['fdr']<=fdr_thresh
|
355
355
|
bool_score = df_result['track_score']>=score_thresh
|
356
356
|
# 2025.06.05. BE可能会形成单边信号,导致 track_score 为负数,也保留
|
357
|
-
bool_neg_score = df_result['track_score']
|
357
|
+
bool_neg_score = df_result['track_score']<-0.5
|
358
358
|
df_output = df_result[bool_fdr|bool_score|bool_neg_score].copy()
|
359
359
|
if pattern_ctr != 'none':
|
360
360
|
df_output = df_output[['target_location', 'best_strand','best_target','deletion','insertion','mismatch',
|
@@ -13,6 +13,7 @@ import offtracker
|
|
13
13
|
import offtracker.X_sequence as xseq
|
14
14
|
script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
|
15
15
|
utility_dir = os.path.join(script_dir, 'utility')
|
16
|
+
os.chmod( os.path.join(utility_dir, 'bedGraphToBigWig'), 0o755)
|
16
17
|
|
17
18
|
###
|
18
19
|
parser = argparse.ArgumentParser()
|
@@ -21,7 +21,6 @@ parser.add_argument('-f','--folder', type=str, required=True, help='Direc
|
|
21
21
|
parser.add_argument('-o','--outdir', type=str, default='same', help='The output folder')
|
22
22
|
parser.add_argument('--subfolder' , type=int, default=0, help='subfolder level')
|
23
23
|
parser.add_argument('-t','--thread', type=int, default=8, help='Number of threads to be used')
|
24
|
-
parser.add_argument('--NGS_type' , type=str, default='paired-end', help='paired-end or single-end')
|
25
24
|
|
26
25
|
args = parser.parse_args()
|
27
26
|
|
@@ -35,7 +34,7 @@ else:
|
|
35
34
|
os.makedirs(args.outdir)
|
36
35
|
|
37
36
|
# 搜索 folder 的 n级子目录下的所有 fastq/fastq.gz/fq/fq.gz 文件
|
38
|
-
sample_names, files_R1, files_R2 = xseq.detect_fastq(args.folder, n_subfolder=args.subfolder
|
37
|
+
sample_names, files_R1, files_R2 = xseq.detect_fastq(args.folder, n_subfolder=args.subfolder)
|
39
38
|
|
40
39
|
assert not isinstance(sample_names, str), 'No fastq file is detected!'
|
41
40
|
|
@@ -43,7 +42,6 @@ dict_yaml = {
|
|
43
42
|
# fastq 信息
|
44
43
|
'files_R1':dict(zip(sample_names,files_R1)),
|
45
44
|
'files_R2':dict(zip(sample_names,files_R2)), # 单端 files_R2=[] 结果会自动为 {}
|
46
|
-
'NGS_type':args.NGS_type,
|
47
45
|
# 输入输出文件夹
|
48
46
|
'input_dir':args.folder,
|
49
47
|
'output_dir':args.outdir,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/offtracker_blacklist_hg38.merged.bed
RENAMED
File without changes
|
{offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/offtracker_blacklist_mm10.merged.bed
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|