offtracker 2.10.2__zip → 2.10.4__zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {offtracker-2.10.2/offtracker.egg-info → offtracker-2.10.4}/PKG-INFO +2 -2
  2. {offtracker-2.10.2 → offtracker-2.10.4}/README.md +1 -1
  3. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/X_offplot.py +0 -125
  4. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/X_offtracker.py +0 -29
  5. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/_version.py +2 -3
  6. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/snakefile/Snakefile_offtracker.smk +16 -16
  7. {offtracker-2.10.2 → offtracker-2.10.4/offtracker.egg-info}/PKG-INFO +2 -2
  8. {offtracker-2.10.2 → offtracker-2.10.4}/scripts/offtracker_analysis.py +9 -9
  9. {offtracker-2.10.2 → offtracker-2.10.4}/scripts/offtracker_config.py +1 -0
  10. {offtracker-2.10.2 → offtracker-2.10.4}/scripts/offtracker_qc.py +1 -3
  11. {offtracker-2.10.2 → offtracker-2.10.4}/LICENSE.txt +0 -0
  12. {offtracker-2.10.2 → offtracker-2.10.4}/MANIFEST.in +0 -0
  13. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/X_sequence.py +0 -0
  14. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/__init__.py +0 -0
  15. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/snakefile/Snakefile_QC.smk +0 -0
  16. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/1.1_bed2fr.py +0 -0
  17. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/1.3_bdg_normalize_v4.0.py +0 -0
  18. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/bedGraphToBigWig +0 -0
  19. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/hg38.chrom.sizes +0 -0
  20. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/mm10.chrom.sizes +0 -0
  21. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/offtracker_blacklist_hg38.merged.bed +0 -0
  22. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker/utility/offtracker_blacklist_mm10.merged.bed +0 -0
  23. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker.egg-info/SOURCES.txt +0 -0
  24. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker.egg-info/dependency_links.txt +0 -0
  25. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker.egg-info/requires.txt +0 -0
  26. {offtracker-2.10.2 → offtracker-2.10.4}/offtracker.egg-info/top_level.txt +0 -0
  27. {offtracker-2.10.2 → offtracker-2.10.4}/scripts/offtracker_candidates.py +0 -0
  28. {offtracker-2.10.2 → offtracker-2.10.4}/scripts/offtracker_plot.py +0 -0
  29. {offtracker-2.10.2 → offtracker-2.10.4}/setup.cfg +0 -0
  30. {offtracker-2.10.2 → offtracker-2.10.4}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: offtracker
3
- Version: 2.10.2
3
+ Version: 2.10.4
4
4
  Summary: Tracking-seq data analysis
5
5
  Home-page: https://github.com/Lan-lab/offtracker
6
6
  Author: Runda Xu
@@ -25,7 +25,7 @@ Offtracker is an end to end pipeline of Tracking-seq data analysis for detecting
25
25
  # We recommend creating a new environment using mamba/conda to avoid compatibility problems
26
26
  # If you don't use mamba, just replace the code with conda
27
27
  # Windows systems may not be compatible with pybedtools.
28
- mamba create -n offtracker -c bioconda blast snakemake pybedtools chromap
28
+ mamba create -n offtracker -c bioconda blast snakemake pybedtools deeptools chromap
29
29
  ```
30
30
 
31
31
 
@@ -13,7 +13,7 @@ Offtracker is an end to end pipeline of Tracking-seq data analysis for detecting
13
13
  # We recommend creating a new environment using mamba/conda to avoid compatibility problems
14
14
  # If you don't use mamba, just replace the code with conda
15
15
  # Windows systems may not be compatible with pybedtools.
16
- mamba create -n offtracker -c bioconda blast snakemake pybedtools chromap
16
+ mamba create -n offtracker -c bioconda blast snakemake pybedtools deeptools chromap
17
17
  ```
18
18
 
19
19
 
@@ -349,131 +349,6 @@ def igv_single(location, file, fig=None, track_name='', track_name_loc='left',
349
349
  return fig, track_position
350
350
 
351
351
 
352
- from statsmodels.nonparametric.smoothers_lowess import lowess
353
- def signal_length(df_bdg_chr, chrom, cleavage_site, end='end',start='start',value='residual',
354
- flank_max=100000, bin_size=100, window_size=3000,signal_threshold = 0.3, show_plot=False, savefig=None, save_dpi=100):
355
- df_bdg_chr = df_bdg_chr[df_bdg_chr['chr']==chrom]
356
- ## left
357
- # 取 cleavage_site 附近的数据
358
- df_bdg_chr_L = df_bdg_chr[ (df_bdg_chr[end] >= cleavage_site-flank_max) & (df_bdg_chr[end]<=cleavage_site) ].copy()
359
- y_L = df_bdg_chr_L[value]
360
- n_bins_L = len(y_L)
361
- x_L = np.arange(n_bins_L)
362
- bins=n_bins_L ## 和 right 公用
363
- # 用 window_size 做临近
364
- frac = window_size/(bins*bin_size)
365
- lowess_smoothed_L = lowess(y_L[-bins:], x_L[-bins:], frac=frac)
366
- lowess_smoothed_L = lowess(lowess_smoothed_L[:, 1], lowess_smoothed_L[:, 0], frac=frac)
367
- # 得到最后一个 <signal_threshold 的 index
368
- bool_L = lowess_smoothed_L[:,1]<signal_threshold
369
- index_L = np.where(bool_L)[0][-1]
370
- if index_L == (bins-1):
371
- # 可能是单边信号,数值反向
372
- lowess_smoothed_L_reverse = -lowess_smoothed_L[:,1]
373
- bool_L = lowess_smoothed_L_reverse<signal_threshold
374
- index_L = np.where(bool_L)[0][-1]
375
- # 考虑到 smooth,所以长度 + 1
376
- index_L = index_L - 1
377
- signal_L = lowess_smoothed_L[index_L+1:,1]
378
- length_L = (len(signal_L)*bin_size)/1000
379
- max_signal_L = y_L.max()
380
- y_max_L = max_signal_L*1.2
381
- left_region = chrom + ':' + df_bdg_chr_L.iloc[0,1].astype(str) + '-' + df_bdg_chr_L.iloc[-1,2].astype(str)
382
-
383
- ## right
384
- # 取 cleavage_site 附近的数据
385
- df_bdg_chr_R = df_bdg_chr[ (df_bdg_chr[start] <= cleavage_site+flank_max) & (df_bdg_chr[start]>=cleavage_site) ].copy()
386
- y_R = df_bdg_chr_R[value]
387
- n_bins_R = len(y_R)
388
- x_R = np.arange(n_bins_R)
389
- # 用 window_size 做临近
390
- frac = window_size/(bins*bin_size)
391
- lowess_smoothed_R = lowess(y_R[:bins], x_R[:bins], frac=frac)
392
- lowess_smoothed_R = lowess(lowess_smoothed_R[:, 1], lowess_smoothed_R[:, 0], frac=frac)
393
- # 得到第一个 >-signal_threshold 的 index
394
- bool_R = lowess_smoothed_R[:,1]>-signal_threshold
395
- index_R = np.where(bool_R)[0][0]
396
- if index_R == 0:
397
- # 可能是单边信号,数值反向
398
- lowess_smoothed_R_reverse = -lowess_smoothed_R[:,1]
399
- bool_R = lowess_smoothed_R_reverse>-signal_threshold
400
- index_R = np.where(bool_R)[0][0]
401
- # 考虑到 smooth,所以长度 + 1
402
- index_R = index_R + 1
403
- signal_R = lowess_smoothed_R[:index_R,1]
404
- length_R = (len(signal_R)*bin_size)/1000
405
- min_signal_R = y_R.min()
406
- y_mim_R = min_signal_R*1.2
407
- right_region = chrom + ':' + df_bdg_chr_R.iloc[0,1].astype(str) + '-' + df_bdg_chr_R.iloc[-1,2].astype(str)
408
-
409
- if show_plot:
410
- fig = plt.figure(figsize=(10, 3))
411
- ax1 = fig.add_axes([0.0, 0.1, 0.5, 0.8])
412
- ax2 = fig.add_axes([0.5, 0.1, 0.5, 0.8])
413
-
414
- # plot left
415
- ax1.plot(range(bins), y_L[-bins:], label='Original')
416
- ax1.plot(range(bins), lowess_smoothed_L[-bins:, 1], label='LOWESS', color='red')
417
- ax1.plot([0,bins],[0,0],label='zero',color='black')
418
- ax1.plot([0,bins],[signal_threshold,signal_threshold],label='threshold_left',color='orange')
419
- ax1.plot([0,bins],[-signal_threshold,-signal_threshold],label='threshold_right',color='orange')
420
- ax1.plot([index_L+1,index_L+1],[y_mim_R,y_max_L],label='length cutoff',color='orange')
421
- ax1.set_ylim(y_mim_R,y_max_L)
422
- ax1.set_xlim(-1,bins+1)
423
- ax1.set_xlabel('distance to cleavage site (kb)')
424
- ax1.set_title(left_region)
425
-
426
- # add xticks
427
- xtick_gap = 10000/bin_size # 10kb
428
- n_xticks = int(np.ceil(bins/xtick_gap))
429
- xticks = np.arange(0,n_xticks+1)*xtick_gap
430
- xticks_label = np.arange(0,n_xticks+1)*10
431
- xticks_label = np.flip(xticks_label)
432
- # add length cutoff into xticks
433
- # # 不加到xticks,可能会和原来的重合,改用text
434
- # xticks = np.append(xticks, index_L+1)
435
- # xticks_label = np.append(xticks_label, length_L)
436
- ax1.text(index_L-3, 3, f'{length_L:g} kb', ha='right', va='top')
437
- ax1.set_xticks(xticks)
438
- _ = ax1.set_xticklabels([f'{x:g}' for x in xticks_label])
439
- ax1.set_ylabel('signal difference\n(coverage per 10M reads)')
440
-
441
- # plot right
442
- ax2.plot(range(bins), y_R[:bins], label='Original')
443
- ax2.plot(range(bins), lowess_smoothed_R[:bins, 1], label='LOWESS', color='red')
444
- ax2.plot([0,bins],[0,0],label='zero',color='black')
445
- ax2.plot([0,bins],[signal_threshold,signal_threshold],label='threshold_left',color='orange')
446
- ax2.plot([0,bins],[-signal_threshold,-signal_threshold],label='threshold_right',color='orange')
447
- ax2.plot([index_R,index_R],[y_mim_R,y_max_L],label='length cutoff',color='orange')
448
- ax2.set_ylim(y_mim_R,y_max_L)
449
- ax2.set_xlim(-1,bins+1)
450
- ax2.set_xlabel('distance to cleavage site (kb)')
451
- ax2.set_title(right_region)
452
-
453
- # add xticks
454
- xtick_gap = 10000/bin_size # 10kb
455
- n_xticks = int(np.ceil(bins/xtick_gap))
456
- xticks = np.arange(0,n_xticks+1)*xtick_gap
457
- xticks_label = np.arange(0,n_xticks+1)*10
458
- # add length cutoff into xticks
459
- # # 不加到xticks,可能会和原来的重合,改用text
460
- # xticks = np.append(xticks, index_R)
461
- # xticks_label = np.append(xticks_label, length_R)
462
- ax2.text(index_R+4, -3, f'{length_R:g} kb', ha='left', va='bottom')
463
- ax2.set_xticks(xticks)
464
- _ = ax2.set_xticklabels([f'{x:g}' for x in xticks_label])
465
-
466
- # 左右两个图紧贴
467
- ax2.set_yticks([])
468
- ax2.set_yticklabels([])
469
- ax2.set_ylabel('')
470
- if savefig is not None:
471
- plt.savefig(savefig, dpi=save_dpi, bbox_inches='tight')
472
- #fig.tight_layout()
473
- plt.show()
474
- return length_L, length_R, lowess_smoothed_L, lowess_smoothed_R, y_L, y_R
475
-
476
-
477
352
  def tracking_plot(signal_L, signal_R, bin_size=100, bins=None,
478
353
  figsize=(10, 3), title='',
479
354
  show_plot=True, fig=None, ax1=None, ax2=None,
@@ -308,32 +308,3 @@ def target_signal_chunk(df_bdg_chr, df_alignment_chr, flank_max=100000, smooth_t
308
308
  return df_result
309
309
 
310
310
 
311
-
312
-
313
- # 2024.01.22. 额外写一个 signal length 算法,增加基于 pos_pct 而非 smooth 后的 overall_signal 的 length,叫 singal_length
314
- def signal_length(df_bdg_chr, chrom, cleavage_site, end='end',start='start',value='residual',
315
- flank_max=100000, binsize=100):
316
- # 输入数据必须是同一条染色体内的
317
- # Left
318
- df_bdg_chr_L = df_bdg_chr[ (df_bdg_chr[end] >= cleavage_site-flank_max) & (df_bdg_chr[end]<=cleavage_site) ].copy()
319
-
320
- # pos and neg
321
- df_bdg_chr_L_flank_pos = df_bdg_chr_L_flank[df_bdg_chr_L_flank[value] > 0]
322
- df_bdg_chr_L_flank_neg = df_bdg_chr_L_flank[df_bdg_chr_L_flank[value] <= 0]
323
- n_pos_left = len(df_bdg_chr_L_flank_pos)
324
- n_neg_left = len(df_bdg_chr_L_flank_neg)
325
- # avoid zero
326
- if n_pos_left == 0:
327
- pos_pct_left = 0
328
- else:
329
- pos_pct_left = n_pos_left/(n_pos_left+n_neg_left)
330
-
331
-
332
- df_bdg_chr_R = df_bdg_chr[ (df_bdg_chr[start] <= cleavage_site+flank_max) & (df_bdg_chr[start]>=cleavage_site) ].copy()
333
- # list_signal_residual_L 数值和之前类似
334
- list_signal_pct_L = []
335
- list_pct_score_L = []
336
- list_signal_residual_L = []
337
-
338
-
339
- return list_return
@@ -1,4 +1,4 @@
1
- __version__ = "2.10.2"
1
+ __version__ = "2.10.4"
2
2
  # 2023.08.11. v1.1.0 adding a option for not normalizing the bw file
3
3
  # 2023.10.26. v1.9.0 prerelease for v2.0
4
4
  # 2023.10.27. v2.0.0 大更新,还没微调
@@ -33,5 +33,4 @@ __version__ = "2.10.2"
33
33
  # 2025.04.25. v2.8.0 修复了 offtracker candidates 会把小写序列转换成 N 的 bug
34
34
  # 2025.05.22. v2.9.0 翻新部分代码结构
35
35
  # 2025.06.05. v2.10.0 增加了QC模块。保留了负数score的记录,并在plot时显示为红字。增加了 "--ignore_chr" 用于跳过common chr过滤。
36
- # 2025.06.17. v2.10.1 修复翻新代码结构导致的bug
37
- # 2025.06.17. v2.10.2 修复翻新代码结构导致的bug
36
+ # 2025.06.17. v2.10.4 修复翻新代码结构导致的bug
@@ -40,7 +40,7 @@ rule chromap:
40
40
  R1=lambda w: _files_R1[w.sample],
41
41
  R2=lambda w: _files_R2[w.sample]
42
42
  threads:
43
- _threads
43
+ _thread
44
44
  params:
45
45
  index=config["index"],
46
46
  fasta=config["fasta"]
@@ -58,7 +58,7 @@ if config["blacklist"] != 'none':
58
58
  input:
59
59
  os.path.join(_output_dir,"{sample}.chromapx.bed")
60
60
  threads:
61
- _threads
61
+ _thread
62
62
  params:
63
63
  blacklist=config["blacklist"]
64
64
  output:
@@ -70,7 +70,7 @@ if config["blacklist"] != 'none':
70
70
  input:
71
71
  os.path.join(_output_dir,"{sample}.filtered.bed")
72
72
  threads:
73
- _threads
73
+ _thread
74
74
  params:
75
75
  dir_script=config["utility_dir"],
76
76
  ignore_chr=config["ignore_chr"],
@@ -84,7 +84,7 @@ else:
84
84
  input:
85
85
  os.path.join(_output_dir,"{sample}.chromapx.bed")
86
86
  threads:
87
- _threads
87
+ _thread
88
88
  params:
89
89
  dir_script=config["utility_dir"],
90
90
  ignore_chr=config["ignore_chr"],
@@ -98,7 +98,7 @@ rule bed2bdg_fw:
98
98
  input:
99
99
  os.path.join(_output_dir,"{sample}.fw.bed")
100
100
  threads:
101
- _threads
101
+ _thread
102
102
  params:
103
103
  gl=config["genomelen"]
104
104
  output:
@@ -110,7 +110,7 @@ rule bed2bdg_rv:
110
110
  input:
111
111
  os.path.join(_output_dir,"{sample}.rv.bed")
112
112
  threads:
113
- _threads
113
+ _thread
114
114
  params:
115
115
  gl=config["genomelen"]
116
116
  output:
@@ -122,7 +122,7 @@ rule bdg_sort_fw:
122
122
  input:
123
123
  fw=os.path.join(_output_dir,"{sample}.fw.bdg")
124
124
  threads:
125
- _threads
125
+ _thread
126
126
  output:
127
127
  temp(os.path.join(_output_dir,"{sample}.fw.sorted.bdg"))
128
128
  shell:
@@ -132,7 +132,7 @@ rule bdg_sort_rv:
132
132
  input:
133
133
  rv=os.path.join(_output_dir,"{sample}.rv.bdg")
134
134
  threads:
135
- _threads
135
+ _thread
136
136
  output:
137
137
  temp(os.path.join(_output_dir,"{sample}.rv.sorted.bdg"))
138
138
  shell:
@@ -144,7 +144,7 @@ if _normalize == "True":
144
144
  bdg=os.path.join(_output_dir,"{sample}.fw.sorted.bdg"),
145
145
  bed=os.path.join(_output_dir,"{sample}.fw.bed")
146
146
  threads:
147
- _threads
147
+ _thread
148
148
  params:
149
149
  dir_script=config["utility_dir"]
150
150
  output:
@@ -157,7 +157,7 @@ if _normalize == "True":
157
157
  bdg=os.path.join(_output_dir,"{sample}.rv.sorted.bdg"),
158
158
  bed=os.path.join(_output_dir,"{sample}.rv.bed")
159
159
  threads:
160
- _threads
160
+ _thread
161
161
  params:
162
162
  dir_script=config["utility_dir"]
163
163
  output:
@@ -169,7 +169,7 @@ if _normalize == "True":
169
169
  input:
170
170
  os.path.join(_output_dir,"{sample}.fw.scaled.bdg")
171
171
  threads:
172
- _threads
172
+ _thread
173
173
  params:
174
174
  gl=config["genomelen"],
175
175
  dir_script=config["utility_dir"]
@@ -182,7 +182,7 @@ if _normalize == "True":
182
182
  input:
183
183
  os.path.join(_output_dir,"{sample}.rv.scaled.bdg")
184
184
  threads:
185
- _threads
185
+ _thread
186
186
  params:
187
187
  gl=config["genomelen"],
188
188
  dir_script=config["utility_dir"]
@@ -196,7 +196,7 @@ if _normalize == "True":
196
196
  fw=os.path.join(_output_dir,"{sample}.fw.scaled.bw"),
197
197
  rv=os.path.join(_output_dir,"{sample}.rv.scaled.bw")
198
198
  threads:
199
- _threads
199
+ _thread
200
200
  output:
201
201
  os.path.join(_output_dir,"{sample}." + _BinSize + ".add.bdg")
202
202
  shell:
@@ -212,7 +212,7 @@ else:
212
212
  input:
213
213
  os.path.join(_output_dir,"{sample}.rv.sorted.bdg")
214
214
  threads:
215
- _threads
215
+ _thread
216
216
  output:
217
217
  temp(os.path.join(_output_dir,"{sample}.rv.sorted_r.bdg"))
218
218
  shell:
@@ -222,7 +222,7 @@ else:
222
222
  input:
223
223
  os.path.join(_output_dir,"{sample}.fw.sorted.bdg")
224
224
  threads:
225
- _threads
225
+ _thread
226
226
  params:
227
227
  gl=config["genomelen"],
228
228
  dir_script=config["utility_dir"]
@@ -235,7 +235,7 @@ else:
235
235
  input:
236
236
  os.path.join(_output_dir,"{sample}.rv.sorted_r.bdg")
237
237
  threads:
238
- _threads
238
+ _thread
239
239
  params:
240
240
  gl=config["genomelen"],
241
241
  dir_script=config["utility_dir"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: offtracker
3
- Version: 2.10.2
3
+ Version: 2.10.4
4
4
  Summary: Tracking-seq data analysis
5
5
  Home-page: https://github.com/Lan-lab/offtracker
6
6
  Author: Runda Xu
@@ -25,7 +25,7 @@ Offtracker is an end to end pipeline of Tracking-seq data analysis for detecting
25
25
  # We recommend creating a new environment using mamba/conda to avoid compatibility problems
26
26
  # If you don't use mamba, just replace the code with conda
27
27
  # Windows systems may not be compatible with pybedtools.
28
- mamba create -n offtracker -c bioconda blast snakemake pybedtools chromap
28
+ mamba create -n offtracker -c bioconda blast snakemake pybedtools deeptools chromap
29
29
  ```
30
30
 
31
31
 
@@ -26,7 +26,7 @@ def main():
26
26
  parser.add_argument('--name' , type=str, required=True, help='custom name of the sgRNA' )
27
27
  parser.add_argument('--exp' , type=str, default='all', nargs='+', help='A substring mark in the name of experimental samples. The default is to use all samples other than control' )
28
28
  parser.add_argument('--control' , type=str, default='none', nargs='+', help='A substring mark in the name of control samples. The default is no control. "others" for all samples other than --exp.' )
29
- parser.add_argument('--fdr' , type=int, default=0.05, help='FDR threshold for the final result. Default is 0.05.')
29
+ parser.add_argument('--fdr' , type=int, default=0.01, help='FDR threshold for the final result. Default is 0.01.')
30
30
  parser.add_argument('--score' , type=int, default=2, help='Track score threshold for the final result. Default is 2.')
31
31
  parser.add_argument('--smooth' , type=int, default=1, help='Smooth strength for the signal.')
32
32
  parser.add_argument('--window' , type=int, default=3, help='Window size for smoothing the signal.')
@@ -93,7 +93,7 @@ def main():
93
93
  all_sample_files = []
94
94
  for a_folder in folders:
95
95
  bdg_files = pd.Series(glob.glob(os.path.join( a_folder, '*.add.bdg' ))).sort_values().reset_index(drop=True)
96
- sample_names = bdg_files.apply(os.path.basename).str.extract('(.*)\.\d+\.add\.bdg',expand=False)
96
+ sample_names = bdg_files.apply(os.path.basename).str.extract(r'(.*)\.\d+\.add\.bdg',expand=False)
97
97
  all_sample_names.extend( sample_names )
98
98
  all_sample_files.extend( bdg_files )
99
99
  all_sample_files = pd.Series(all_sample_files)
@@ -209,7 +209,7 @@ def main():
209
209
  df_score = pd.read_csv(output, index_col=0)
210
210
  else:
211
211
  signal_files = pd.Series(glob.glob( os.path.join(outdir, 'temp', f'*{sgRNA_name}.signal.csv') ))
212
- signal_names = signal_files.apply(os.path.basename).str.extract(f'(.*)\.{sgRNA_name}\.signal\.csv',expand=False)
212
+ signal_names = signal_files.apply(os.path.basename).str.extract(rf'(.*)\.{sgRNA_name}\.signal\.csv',expand=False)
213
213
 
214
214
  # 读取并合并 samples
215
215
  list_df_exp_samples = []
@@ -287,7 +287,7 @@ def main():
287
287
  # 整理表格
288
288
  mean_seq_score = round(df_score['best_seq_score'].mean(),3)
289
289
  df_score['norm_best_seq_score'] = np.power(df_score['best_seq_score']/mean_seq_score, seq_score_power)
290
- df_score['final_score_1'] = df_score[f'proximal_signal']*df_score['norm_best_seq_score']
290
+ df_score['final_score_1'] = df_score['proximal_signal']*df_score['norm_best_seq_score']
291
291
  df_score['final_score_2'] = df_score['pct_score']*df_score['norm_best_seq_score']
292
292
  #df_score['final_score_2'] = df_score[f'overall_signal']*df_score['norm_best_seq_score']
293
293
  df_score['raw_score'] = df_score['final_score_1'] + df_score['final_score_2']
@@ -303,10 +303,10 @@ def main():
303
303
  score_bkg = df_result['raw_score'][n_outliers:-n_outliers]
304
304
  mean_score_bkg = score_bkg.mean()
305
305
  std_score_bkg = score_bkg.std()
306
- df_result['track_score'] = (df_result[f'raw_score'] - mean_score_bkg) / std_score_bkg
307
- df_result['track_score'] = df_result[f'track_score']*target_std + 1
306
+ df_result['track_score'] = (df_result['raw_score'] - mean_score_bkg) / std_score_bkg
307
+ df_result['track_score'] = df_result['track_score']*target_std + 1
308
308
  df_result = df_result.sort_values(by='track_score', ascending=False)
309
- df_result['log2_track_score'] = np.log2(df_result[f'track_score'].clip(lower=0.5))
309
+ df_result['log2_track_score'] = np.log2(df_result['track_score'].clip(lower=0.5))
310
310
 
311
311
  # 单边信号周围有更高分的,去掉
312
312
  # v2.1 后 cols_L, cols_R 要手动
@@ -345,7 +345,7 @@ def main():
345
345
  mu, std = norm.fit(score_for_fitting)
346
346
  print('mean_score:{:.3f};std:{:.3f}'.format(mu,std))
347
347
  # pv and fdr
348
- df_result['pv'] = df_result[f'log2_track_score'].apply( lambda x: norm.sf(x,loc=mu,scale=std) )
348
+ df_result['pv'] = df_result['log2_track_score'].apply( lambda x: norm.sf(x,loc=mu,scale=std) )
349
349
  df_result['pv'] = df_result['pv'].clip(lower=1e-320)
350
350
  df_result['fdr'] = offtracker.fdr(df_result['pv'])
351
351
  df_result['rank'] = range(1,len(df_result)+1)
@@ -354,7 +354,7 @@ def main():
354
354
  bool_fdr = df_result['fdr']<=fdr_thresh
355
355
  bool_score = df_result['track_score']>=score_thresh
356
356
  # 2025.06.05. BE可能会形成单边信号,导致 track_score 为负数,也保留
357
- bool_neg_score = df_result['track_score']<0
357
+ bool_neg_score = df_result['track_score']<-0.5
358
358
  df_output = df_result[bool_fdr|bool_score|bool_neg_score].copy()
359
359
  if pattern_ctr != 'none':
360
360
  df_output = df_output[['target_location', 'best_strand','best_target','deletion','insertion','mismatch',
@@ -13,6 +13,7 @@ import offtracker
13
13
  import offtracker.X_sequence as xseq
14
14
  script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
15
15
  utility_dir = os.path.join(script_dir, 'utility')
16
+ os.chmod( os.path.join(utility_dir, 'bedGraphToBigWig'), 0o755)
16
17
 
17
18
  ###
18
19
  parser = argparse.ArgumentParser()
@@ -21,7 +21,6 @@ parser.add_argument('-f','--folder', type=str, required=True, help='Direc
21
21
  parser.add_argument('-o','--outdir', type=str, default='same', help='The output folder')
22
22
  parser.add_argument('--subfolder' , type=int, default=0, help='subfolder level')
23
23
  parser.add_argument('-t','--thread', type=int, default=8, help='Number of threads to be used')
24
- parser.add_argument('--NGS_type' , type=str, default='paired-end', help='paired-end or single-end')
25
24
 
26
25
  args = parser.parse_args()
27
26
 
@@ -35,7 +34,7 @@ else:
35
34
  os.makedirs(args.outdir)
36
35
 
37
36
  # 搜索 folder 的 n级子目录下的所有 fastq/fastq.gz/fq/fq.gz 文件
38
- sample_names, files_R1, files_R2 = xseq.detect_fastq(args.folder, n_subfolder=args.subfolder, NGS_type=args.NGS_type)
37
+ sample_names, files_R1, files_R2 = xseq.detect_fastq(args.folder, n_subfolder=args.subfolder)
39
38
 
40
39
  assert not isinstance(sample_names, str), 'No fastq file is detected!'
41
40
 
@@ -43,7 +42,6 @@ dict_yaml = {
43
42
  # fastq 信息
44
43
  'files_R1':dict(zip(sample_names,files_R1)),
45
44
  'files_R2':dict(zip(sample_names,files_R2)), # 单端 files_R2=[] 结果会自动为 {}
46
- 'NGS_type':args.NGS_type,
47
45
  # 输入输出文件夹
48
46
  'input_dir':args.folder,
49
47
  'output_dir':args.outdir,
File without changes
File without changes
File without changes
File without changes