offtracker 2.10.3__zip → 2.10.4__zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {offtracker-2.10.3/offtracker.egg-info → offtracker-2.10.4}/PKG-INFO +2 -2
  2. {offtracker-2.10.3 → offtracker-2.10.4}/README.md +1 -1
  3. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker/X_offplot.py +0 -125
  4. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker/X_offtracker.py +0 -29
  5. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker/_version.py +2 -2
  6. {offtracker-2.10.3 → offtracker-2.10.4/offtracker.egg-info}/PKG-INFO +2 -2
  7. {offtracker-2.10.3 → offtracker-2.10.4}/scripts/offtracker_analysis.py +9 -9
  8. {offtracker-2.10.3 → offtracker-2.10.4}/scripts/offtracker_config.py +1 -0
  9. {offtracker-2.10.3 → offtracker-2.10.4}/LICENSE.txt +0 -0
  10. {offtracker-2.10.3 → offtracker-2.10.4}/MANIFEST.in +0 -0
  11. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker/X_sequence.py +0 -0
  12. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker/__init__.py +0 -0
  13. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker/snakefile/Snakefile_QC.smk +0 -0
  14. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker/snakefile/Snakefile_offtracker.smk +0 -0
  15. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker/utility/1.1_bed2fr.py +0 -0
  16. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker/utility/1.3_bdg_normalize_v4.0.py +0 -0
  17. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker/utility/bedGraphToBigWig +0 -0
  18. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker/utility/hg38.chrom.sizes +0 -0
  19. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker/utility/mm10.chrom.sizes +0 -0
  20. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker/utility/offtracker_blacklist_hg38.merged.bed +0 -0
  21. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker/utility/offtracker_blacklist_mm10.merged.bed +0 -0
  22. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker.egg-info/SOURCES.txt +0 -0
  23. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker.egg-info/dependency_links.txt +0 -0
  24. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker.egg-info/requires.txt +0 -0
  25. {offtracker-2.10.3 → offtracker-2.10.4}/offtracker.egg-info/top_level.txt +0 -0
  26. {offtracker-2.10.3 → offtracker-2.10.4}/scripts/offtracker_candidates.py +0 -0
  27. {offtracker-2.10.3 → offtracker-2.10.4}/scripts/offtracker_plot.py +0 -0
  28. {offtracker-2.10.3 → offtracker-2.10.4}/scripts/offtracker_qc.py +0 -0
  29. {offtracker-2.10.3 → offtracker-2.10.4}/setup.cfg +0 -0
  30. {offtracker-2.10.3 → offtracker-2.10.4}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: offtracker
3
- Version: 2.10.3
3
+ Version: 2.10.4
4
4
  Summary: Tracking-seq data analysis
5
5
  Home-page: https://github.com/Lan-lab/offtracker
6
6
  Author: Runda Xu
@@ -25,7 +25,7 @@ Offtracker is an end to end pipeline of Tracking-seq data analysis for detecting
25
25
  # We recommend creating a new environment using mamba/conda to avoid compatibility problems
26
26
  # If you don't use mamba, just replace the code with conda
27
27
  # Windows systems may not be compatible with pybedtools.
28
- mamba create -n offtracker -c bioconda blast snakemake pybedtools chromap
28
+ mamba create -n offtracker -c bioconda blast snakemake pybedtools deeptools chromap
29
29
  ```
30
30
 
31
31
 
@@ -13,7 +13,7 @@ Offtracker is an end to end pipeline of Tracking-seq data analysis for detecting
13
13
  # We recommend creating a new environment using mamba/conda to avoid compatibility problems
14
14
  # If you don't use mamba, just replace the code with conda
15
15
  # Windows systems may not be compatible with pybedtools.
16
- mamba create -n offtracker -c bioconda blast snakemake pybedtools chromap
16
+ mamba create -n offtracker -c bioconda blast snakemake pybedtools deeptools chromap
17
17
  ```
18
18
 
19
19
 
@@ -349,131 +349,6 @@ def igv_single(location, file, fig=None, track_name='', track_name_loc='left',
349
349
  return fig, track_position
350
350
 
351
351
 
352
- from statsmodels.nonparametric.smoothers_lowess import lowess
353
- def signal_length(df_bdg_chr, chrom, cleavage_site, end='end',start='start',value='residual',
354
- flank_max=100000, bin_size=100, window_size=3000,signal_threshold = 0.3, show_plot=False, savefig=None, save_dpi=100):
355
- df_bdg_chr = df_bdg_chr[df_bdg_chr['chr']==chrom]
356
- ## left
357
- # 取 cleavage_site 附近的数据
358
- df_bdg_chr_L = df_bdg_chr[ (df_bdg_chr[end] >= cleavage_site-flank_max) & (df_bdg_chr[end]<=cleavage_site) ].copy()
359
- y_L = df_bdg_chr_L[value]
360
- n_bins_L = len(y_L)
361
- x_L = np.arange(n_bins_L)
362
- bins=n_bins_L ## 和 right 公用
363
- # 用 window_size 做临近
364
- frac = window_size/(bins*bin_size)
365
- lowess_smoothed_L = lowess(y_L[-bins:], x_L[-bins:], frac=frac)
366
- lowess_smoothed_L = lowess(lowess_smoothed_L[:, 1], lowess_smoothed_L[:, 0], frac=frac)
367
- # 得到最后一个 <signal_threshold 的 index
368
- bool_L = lowess_smoothed_L[:,1]<signal_threshold
369
- index_L = np.where(bool_L)[0][-1]
370
- if index_L == (bins-1):
371
- # 可能是单边信号,数值反向
372
- lowess_smoothed_L_reverse = -lowess_smoothed_L[:,1]
373
- bool_L = lowess_smoothed_L_reverse<signal_threshold
374
- index_L = np.where(bool_L)[0][-1]
375
- # 考虑到 smooth,所以长度 + 1
376
- index_L = index_L - 1
377
- signal_L = lowess_smoothed_L[index_L+1:,1]
378
- length_L = (len(signal_L)*bin_size)/1000
379
- max_signal_L = y_L.max()
380
- y_max_L = max_signal_L*1.2
381
- left_region = chrom + ':' + df_bdg_chr_L.iloc[0,1].astype(str) + '-' + df_bdg_chr_L.iloc[-1,2].astype(str)
382
-
383
- ## right
384
- # 取 cleavage_site 附近的数据
385
- df_bdg_chr_R = df_bdg_chr[ (df_bdg_chr[start] <= cleavage_site+flank_max) & (df_bdg_chr[start]>=cleavage_site) ].copy()
386
- y_R = df_bdg_chr_R[value]
387
- n_bins_R = len(y_R)
388
- x_R = np.arange(n_bins_R)
389
- # 用 window_size 做临近
390
- frac = window_size/(bins*bin_size)
391
- lowess_smoothed_R = lowess(y_R[:bins], x_R[:bins], frac=frac)
392
- lowess_smoothed_R = lowess(lowess_smoothed_R[:, 1], lowess_smoothed_R[:, 0], frac=frac)
393
- # 得到第一个 >-signal_threshold 的 index
394
- bool_R = lowess_smoothed_R[:,1]>-signal_threshold
395
- index_R = np.where(bool_R)[0][0]
396
- if index_R == 0:
397
- # 可能是单边信号,数值反向
398
- lowess_smoothed_R_reverse = -lowess_smoothed_R[:,1]
399
- bool_R = lowess_smoothed_R_reverse>-signal_threshold
400
- index_R = np.where(bool_R)[0][0]
401
- # 考虑到 smooth,所以长度 + 1
402
- index_R = index_R + 1
403
- signal_R = lowess_smoothed_R[:index_R,1]
404
- length_R = (len(signal_R)*bin_size)/1000
405
- min_signal_R = y_R.min()
406
- y_mim_R = min_signal_R*1.2
407
- right_region = chrom + ':' + df_bdg_chr_R.iloc[0,1].astype(str) + '-' + df_bdg_chr_R.iloc[-1,2].astype(str)
408
-
409
- if show_plot:
410
- fig = plt.figure(figsize=(10, 3))
411
- ax1 = fig.add_axes([0.0, 0.1, 0.5, 0.8])
412
- ax2 = fig.add_axes([0.5, 0.1, 0.5, 0.8])
413
-
414
- # plot left
415
- ax1.plot(range(bins), y_L[-bins:], label='Original')
416
- ax1.plot(range(bins), lowess_smoothed_L[-bins:, 1], label='LOWESS', color='red')
417
- ax1.plot([0,bins],[0,0],label='zero',color='black')
418
- ax1.plot([0,bins],[signal_threshold,signal_threshold],label='threshold_left',color='orange')
419
- ax1.plot([0,bins],[-signal_threshold,-signal_threshold],label='threshold_right',color='orange')
420
- ax1.plot([index_L+1,index_L+1],[y_mim_R,y_max_L],label='length cutoff',color='orange')
421
- ax1.set_ylim(y_mim_R,y_max_L)
422
- ax1.set_xlim(-1,bins+1)
423
- ax1.set_xlabel('distance to cleavage site (kb)')
424
- ax1.set_title(left_region)
425
-
426
- # add xticks
427
- xtick_gap = 10000/bin_size # 10kb
428
- n_xticks = int(np.ceil(bins/xtick_gap))
429
- xticks = np.arange(0,n_xticks+1)*xtick_gap
430
- xticks_label = np.arange(0,n_xticks+1)*10
431
- xticks_label = np.flip(xticks_label)
432
- # add length cutoff into xticks
433
- # # 不加到xticks,可能会和原来的重合,改用text
434
- # xticks = np.append(xticks, index_L+1)
435
- # xticks_label = np.append(xticks_label, length_L)
436
- ax1.text(index_L-3, 3, f'{length_L:g} kb', ha='right', va='top')
437
- ax1.set_xticks(xticks)
438
- _ = ax1.set_xticklabels([f'{x:g}' for x in xticks_label])
439
- ax1.set_ylabel('signal difference\n(coverage per 10M reads)')
440
-
441
- # plot right
442
- ax2.plot(range(bins), y_R[:bins], label='Original')
443
- ax2.plot(range(bins), lowess_smoothed_R[:bins, 1], label='LOWESS', color='red')
444
- ax2.plot([0,bins],[0,0],label='zero',color='black')
445
- ax2.plot([0,bins],[signal_threshold,signal_threshold],label='threshold_left',color='orange')
446
- ax2.plot([0,bins],[-signal_threshold,-signal_threshold],label='threshold_right',color='orange')
447
- ax2.plot([index_R,index_R],[y_mim_R,y_max_L],label='length cutoff',color='orange')
448
- ax2.set_ylim(y_mim_R,y_max_L)
449
- ax2.set_xlim(-1,bins+1)
450
- ax2.set_xlabel('distance to cleavage site (kb)')
451
- ax2.set_title(right_region)
452
-
453
- # add xticks
454
- xtick_gap = 10000/bin_size # 10kb
455
- n_xticks = int(np.ceil(bins/xtick_gap))
456
- xticks = np.arange(0,n_xticks+1)*xtick_gap
457
- xticks_label = np.arange(0,n_xticks+1)*10
458
- # add length cutoff into xticks
459
- # # 不加到xticks,可能会和原来的重合,改用text
460
- # xticks = np.append(xticks, index_R)
461
- # xticks_label = np.append(xticks_label, length_R)
462
- ax2.text(index_R+4, -3, f'{length_R:g} kb', ha='left', va='bottom')
463
- ax2.set_xticks(xticks)
464
- _ = ax2.set_xticklabels([f'{x:g}' for x in xticks_label])
465
-
466
- # 左右两个图紧贴
467
- ax2.set_yticks([])
468
- ax2.set_yticklabels([])
469
- ax2.set_ylabel('')
470
- if savefig is not None:
471
- plt.savefig(savefig, dpi=save_dpi, bbox_inches='tight')
472
- #fig.tight_layout()
473
- plt.show()
474
- return length_L, length_R, lowess_smoothed_L, lowess_smoothed_R, y_L, y_R
475
-
476
-
477
352
  def tracking_plot(signal_L, signal_R, bin_size=100, bins=None,
478
353
  figsize=(10, 3), title='',
479
354
  show_plot=True, fig=None, ax1=None, ax2=None,
@@ -308,32 +308,3 @@ def target_signal_chunk(df_bdg_chr, df_alignment_chr, flank_max=100000, smooth_t
308
308
  return df_result
309
309
 
310
310
 
311
-
312
-
313
- # 2024.01.22. 额外写一个 signal length 算法,增加基于 pos_pct 而非 smooth 后的 overall_signal 的 length,叫 singal_length
314
- def signal_length(df_bdg_chr, chrom, cleavage_site, end='end',start='start',value='residual',
315
- flank_max=100000, binsize=100):
316
- # 输入数据必须是同一条染色体内的
317
- # Left
318
- df_bdg_chr_L = df_bdg_chr[ (df_bdg_chr[end] >= cleavage_site-flank_max) & (df_bdg_chr[end]<=cleavage_site) ].copy()
319
-
320
- # pos and neg
321
- df_bdg_chr_L_flank_pos = df_bdg_chr_L_flank[df_bdg_chr_L_flank[value] > 0]
322
- df_bdg_chr_L_flank_neg = df_bdg_chr_L_flank[df_bdg_chr_L_flank[value] <= 0]
323
- n_pos_left = len(df_bdg_chr_L_flank_pos)
324
- n_neg_left = len(df_bdg_chr_L_flank_neg)
325
- # avoid zero
326
- if n_pos_left == 0:
327
- pos_pct_left = 0
328
- else:
329
- pos_pct_left = n_pos_left/(n_pos_left+n_neg_left)
330
-
331
-
332
- df_bdg_chr_R = df_bdg_chr[ (df_bdg_chr[start] <= cleavage_site+flank_max) & (df_bdg_chr[start]>=cleavage_site) ].copy()
333
- # list_signal_residual_L 数值和之前类似
334
- list_signal_pct_L = []
335
- list_pct_score_L = []
336
- list_signal_residual_L = []
337
-
338
-
339
- return list_return
@@ -1,4 +1,4 @@
1
- __version__ = "2.10.3"
1
+ __version__ = "2.10.4"
2
2
  # 2023.08.11. v1.1.0 adding a option for not normalizing the bw file
3
3
  # 2023.10.26. v1.9.0 prerelease for v2.0
4
4
  # 2023.10.27. v2.0.0 大更新,还没微调
@@ -33,4 +33,4 @@ __version__ = "2.10.3"
33
33
  # 2025.04.25. v2.8.0 修复了 offtracker candidates 会把小写序列转换成 N 的 bug
34
34
  # 2025.05.22. v2.9.0 翻新部分代码结构
35
35
  # 2025.06.05. v2.10.0 增加了QC模块。保留了负数score的记录,并在plot时显示为红字。增加了 "--ignore_chr" 用于跳过common chr过滤。
36
- # 2025.06.17. v2.10.3 修复翻新代码结构导致的bug
36
+ # 2025.06.17. v2.10.4 修复翻新代码结构导致的bug
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: offtracker
3
- Version: 2.10.3
3
+ Version: 2.10.4
4
4
  Summary: Tracking-seq data analysis
5
5
  Home-page: https://github.com/Lan-lab/offtracker
6
6
  Author: Runda Xu
@@ -25,7 +25,7 @@ Offtracker is an end to end pipeline of Tracking-seq data analysis for detecting
25
25
  # We recommend creating a new environment using mamba/conda to avoid compatibility problems
26
26
  # If you don't use mamba, just replace the code with conda
27
27
  # Windows systems may not be compatible with pybedtools.
28
- mamba create -n offtracker -c bioconda blast snakemake pybedtools chromap
28
+ mamba create -n offtracker -c bioconda blast snakemake pybedtools deeptools chromap
29
29
  ```
30
30
 
31
31
 
@@ -26,7 +26,7 @@ def main():
26
26
  parser.add_argument('--name' , type=str, required=True, help='custom name of the sgRNA' )
27
27
  parser.add_argument('--exp' , type=str, default='all', nargs='+', help='A substring mark in the name of experimental samples. The default is to use all samples other than control' )
28
28
  parser.add_argument('--control' , type=str, default='none', nargs='+', help='A substring mark in the name of control samples. The default is no control. "others" for all samples other than --exp.' )
29
- parser.add_argument('--fdr' , type=int, default=0.05, help='FDR threshold for the final result. Default is 0.05.')
29
+ parser.add_argument('--fdr' , type=int, default=0.01, help='FDR threshold for the final result. Default is 0.01.')
30
30
  parser.add_argument('--score' , type=int, default=2, help='Track score threshold for the final result. Default is 2.')
31
31
  parser.add_argument('--smooth' , type=int, default=1, help='Smooth strength for the signal.')
32
32
  parser.add_argument('--window' , type=int, default=3, help='Window size for smoothing the signal.')
@@ -93,7 +93,7 @@ def main():
93
93
  all_sample_files = []
94
94
  for a_folder in folders:
95
95
  bdg_files = pd.Series(glob.glob(os.path.join( a_folder, '*.add.bdg' ))).sort_values().reset_index(drop=True)
96
- sample_names = bdg_files.apply(os.path.basename).str.extract('(.*)\.\d+\.add\.bdg',expand=False)
96
+ sample_names = bdg_files.apply(os.path.basename).str.extract(r'(.*)\.\d+\.add\.bdg',expand=False)
97
97
  all_sample_names.extend( sample_names )
98
98
  all_sample_files.extend( bdg_files )
99
99
  all_sample_files = pd.Series(all_sample_files)
@@ -209,7 +209,7 @@ def main():
209
209
  df_score = pd.read_csv(output, index_col=0)
210
210
  else:
211
211
  signal_files = pd.Series(glob.glob( os.path.join(outdir, 'temp', f'*{sgRNA_name}.signal.csv') ))
212
- signal_names = signal_files.apply(os.path.basename).str.extract(f'(.*)\.{sgRNA_name}\.signal\.csv',expand=False)
212
+ signal_names = signal_files.apply(os.path.basename).str.extract(rf'(.*)\.{sgRNA_name}\.signal\.csv',expand=False)
213
213
 
214
214
  # 读取并合并 samples
215
215
  list_df_exp_samples = []
@@ -287,7 +287,7 @@ def main():
287
287
  # 整理表格
288
288
  mean_seq_score = round(df_score['best_seq_score'].mean(),3)
289
289
  df_score['norm_best_seq_score'] = np.power(df_score['best_seq_score']/mean_seq_score, seq_score_power)
290
- df_score['final_score_1'] = df_score[f'proximal_signal']*df_score['norm_best_seq_score']
290
+ df_score['final_score_1'] = df_score['proximal_signal']*df_score['norm_best_seq_score']
291
291
  df_score['final_score_2'] = df_score['pct_score']*df_score['norm_best_seq_score']
292
292
  #df_score['final_score_2'] = df_score[f'overall_signal']*df_score['norm_best_seq_score']
293
293
  df_score['raw_score'] = df_score['final_score_1'] + df_score['final_score_2']
@@ -303,10 +303,10 @@ def main():
303
303
  score_bkg = df_result['raw_score'][n_outliers:-n_outliers]
304
304
  mean_score_bkg = score_bkg.mean()
305
305
  std_score_bkg = score_bkg.std()
306
- df_result['track_score'] = (df_result[f'raw_score'] - mean_score_bkg) / std_score_bkg
307
- df_result['track_score'] = df_result[f'track_score']*target_std + 1
306
+ df_result['track_score'] = (df_result['raw_score'] - mean_score_bkg) / std_score_bkg
307
+ df_result['track_score'] = df_result['track_score']*target_std + 1
308
308
  df_result = df_result.sort_values(by='track_score', ascending=False)
309
- df_result['log2_track_score'] = np.log2(df_result[f'track_score'].clip(lower=0.5))
309
+ df_result['log2_track_score'] = np.log2(df_result['track_score'].clip(lower=0.5))
310
310
 
311
311
  # 单边信号周围有更高分的,去掉
312
312
  # v2.1 后 cols_L, cols_R 要手动
@@ -345,7 +345,7 @@ def main():
345
345
  mu, std = norm.fit(score_for_fitting)
346
346
  print('mean_score:{:.3f};std:{:.3f}'.format(mu,std))
347
347
  # pv and fdr
348
- df_result['pv'] = df_result[f'log2_track_score'].apply( lambda x: norm.sf(x,loc=mu,scale=std) )
348
+ df_result['pv'] = df_result['log2_track_score'].apply( lambda x: norm.sf(x,loc=mu,scale=std) )
349
349
  df_result['pv'] = df_result['pv'].clip(lower=1e-320)
350
350
  df_result['fdr'] = offtracker.fdr(df_result['pv'])
351
351
  df_result['rank'] = range(1,len(df_result)+1)
@@ -354,7 +354,7 @@ def main():
354
354
  bool_fdr = df_result['fdr']<=fdr_thresh
355
355
  bool_score = df_result['track_score']>=score_thresh
356
356
  # 2025.06.05. BE可能会形成单边信号,导致 track_score 为负数,也保留
357
- bool_neg_score = df_result['track_score']<0
357
+ bool_neg_score = df_result['track_score']<-0.5
358
358
  df_output = df_result[bool_fdr|bool_score|bool_neg_score].copy()
359
359
  if pattern_ctr != 'none':
360
360
  df_output = df_output[['target_location', 'best_strand','best_target','deletion','insertion','mismatch',
@@ -13,6 +13,7 @@ import offtracker
13
13
  import offtracker.X_sequence as xseq
14
14
  script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
15
15
  utility_dir = os.path.join(script_dir, 'utility')
16
+ os.chmod( os.path.join(utility_dir, 'bedGraphToBigWig'), 0o755)
16
17
 
17
18
  ###
18
19
  parser = argparse.ArgumentParser()
File without changes
File without changes
File without changes
File without changes