offtracker 2.11.3__zip → 2.12.0__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {offtracker-2.11.3/offtracker.egg-info → offtracker-2.12.0}/PKG-INFO +1 -1
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker/X_offtracker.py +58 -18
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker/_version.py +4 -2
- {offtracker-2.11.3 → offtracker-2.12.0/offtracker.egg-info}/PKG-INFO +1 -1
- {offtracker-2.11.3 → offtracker-2.12.0}/scripts/offtracker_analysis.py +37 -31
- {offtracker-2.11.3 → offtracker-2.12.0}/scripts/offtracker_candidates.py +15 -10
- {offtracker-2.11.3 → offtracker-2.12.0}/LICENSE.txt +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/MANIFEST.in +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/README.md +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker/X_offplot.py +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker/X_sequence.py +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker/__init__.py +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker/snakefile/Snakefile_QC.smk +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker/snakefile/Snakefile_offtracker.smk +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker/utility/1.1_bed2fr.py +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker/utility/1.3_bdg_normalize_v4.0.py +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker/utility/bedGraphToBigWig +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker/utility/hg38.chrom.sizes +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker/utility/mm10.chrom.sizes +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker/utility/offtracker_blacklist_hg38.merged.bed +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker/utility/offtracker_blacklist_mm10.merged.bed +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker.egg-info/SOURCES.txt +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker.egg-info/dependency_links.txt +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker.egg-info/requires.txt +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/offtracker.egg-info/top_level.txt +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/scripts/offtracker_config.py +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/scripts/offtracker_init.py +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/scripts/offtracker_plot.py +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/scripts/offtracker_qc.py +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/setup.cfg +0 -0
- {offtracker-2.11.3 → offtracker-2.12.0}/setup.py +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
|
|
2
2
|
import pandas as pd
|
|
3
|
+
import polars as pl
|
|
3
4
|
import numpy as np
|
|
4
5
|
import os, sys
|
|
5
6
|
sys.path.append( os.path.abspath(os.path.dirname(__file__)) )
|
|
@@ -8,26 +9,65 @@ def fdr(p_vals):
|
|
|
8
9
|
# Benjamini-Hochberg
|
|
9
10
|
from scipy.stats import rankdata
|
|
10
11
|
ranked_p_values = rankdata(p_vals)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
return
|
|
12
|
+
fdr_value = p_vals * len(p_vals) / ranked_p_values
|
|
13
|
+
fdr_value[fdr_value > 1] = 1
|
|
14
|
+
return fdr_value
|
|
14
15
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
for
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
16
|
+
|
|
17
|
+
def mark_regions_single_chr(dp, min_distance=1000):
|
|
18
|
+
unique_chr = dp['chr'].unique()
|
|
19
|
+
assert len(unique_chr) == 1
|
|
20
|
+
unique_chr = unique_chr[0]
|
|
21
|
+
|
|
22
|
+
# Initialize variables for marking regions
|
|
23
|
+
region_id = 1
|
|
24
|
+
current_start = None
|
|
25
|
+
current_end = None
|
|
26
|
+
marked_regions = []
|
|
27
|
+
|
|
28
|
+
for row in dp.iter_rows(named=True):
|
|
29
|
+
start, end = row['st'], row['ed']
|
|
30
|
+
|
|
31
|
+
if current_start is None:
|
|
32
|
+
# First region
|
|
33
|
+
current_start = start
|
|
34
|
+
current_end = end
|
|
35
|
+
marked_regions.append(f'{unique_chr}_region_{region_id}')
|
|
26
36
|
else:
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
37
|
+
if start <= current_end + min_distance:
|
|
38
|
+
# Mark as the same region
|
|
39
|
+
marked_regions.append(f'{unique_chr}_region_{region_id}')
|
|
40
|
+
else:
|
|
41
|
+
# New region
|
|
42
|
+
region_id += 1
|
|
43
|
+
marked_regions.append(f'{unique_chr}_region_{region_id}')
|
|
44
|
+
current_start = start
|
|
45
|
+
current_end = end
|
|
46
|
+
|
|
47
|
+
current_end = max(current_end, end)
|
|
48
|
+
|
|
49
|
+
return dp.with_columns(region_index=pl.Series(marked_regions))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# def dedup_two( df_loc, col_ID_1='ID_1', col_ID_2='ID_2'):
|
|
56
|
+
# # 会根据 df_loc 的排序保留第一个 location
|
|
57
|
+
# # dedup 结束后,剩下的 ID_1 + ID_2 并集可能会小于 dedup 前的并集
|
|
58
|
+
# list_nondup = []
|
|
59
|
+
# set_IDs = set()
|
|
60
|
+
# df_IDs = df_loc[[col_ID_1,col_ID_2]]
|
|
61
|
+
# for a_row in df_IDs.iterrows():
|
|
62
|
+
# temp = a_row[1]
|
|
63
|
+
# if (temp[col_ID_1] in set_IDs) or (temp[col_ID_2] in set_IDs):
|
|
64
|
+
# # 只要有一ID出现过,即便另一ID没出现过,也不更新 set_IDs
|
|
65
|
+
# list_nondup.append(False)
|
|
66
|
+
# else:
|
|
67
|
+
# set_IDs.add(temp[col_ID_1])
|
|
68
|
+
# set_IDs.add(temp[col_ID_2])
|
|
69
|
+
# list_nondup.append(True)
|
|
70
|
+
# return list_nondup
|
|
31
71
|
|
|
32
72
|
def window_smooth(sr_smooth, window_size=3, times=1):
|
|
33
73
|
window = np.ones(window_size) / window_size
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
__version__ = "2.
|
|
1
|
+
__version__ = "2.12.0"
|
|
2
2
|
# 2023.08.11. v1.1.0 adding a option for not normalizing the bw file
|
|
3
3
|
# 2023.10.26. v1.9.0 prerelease for v2.0
|
|
4
4
|
# 2023.10.27. v2.0.0 大更新,还没微调
|
|
@@ -38,4 +38,6 @@ __version__ = "2.11.3"
|
|
|
38
38
|
# 2025.06.28. v2.10.9 现在 pip 都是从 wheel 安装,不再运行 setup.py,所以增加一个 offtracker_init.py
|
|
39
39
|
# 2025.06.28. v2.10.10 直接塞 script 里试试
|
|
40
40
|
# 2025.06.28. v2.10.11 回滚到2.10.9外加修正
|
|
41
|
-
# 2025.07.02. v2.11.
|
|
41
|
+
# 2025.07.02. v2.11.4 基于 blast 的缺陷更新 candidates,去除 quick mode
|
|
42
|
+
# 2025.07.04. v2.11.5 offtracker_analysis 提前 skip 已有结果的样本
|
|
43
|
+
# 2025.07.04. v2.12.0 新增 region_index 标记区域,用于更好的去重
|
|
@@ -82,7 +82,7 @@ def main():
|
|
|
82
82
|
df_candidate.index = df_candidate['target_location']
|
|
83
83
|
df_candidate_brief = df_candidate[['chr','st','ed','best_strand','best_target','best_seq_score',
|
|
84
84
|
'deletion', 'insertion','mismatch', 'GG',
|
|
85
|
-
'target_location', 'cleavage_site', 'ID_1','ID_2']]
|
|
85
|
+
'target_location', 'cleavage_site', 'ID_1','ID_2', 'region_index']] # 2025.07.06 添加 region_index
|
|
86
86
|
df_candidate_sub = df_candidate[['chr','cleavage_site']]
|
|
87
87
|
except FileNotFoundError:
|
|
88
88
|
return 'Please run offtracker_candidates.py first and provide the correct directory with --seqfolder'
|
|
@@ -192,7 +192,8 @@ def main():
|
|
|
192
192
|
|
|
193
193
|
if args.signal_only:
|
|
194
194
|
return 'signal_only is on, stop here.'
|
|
195
|
-
|
|
195
|
+
|
|
196
|
+
|
|
196
197
|
####################
|
|
197
198
|
## group analysis ##
|
|
198
199
|
####################
|
|
@@ -204,6 +205,11 @@ def main():
|
|
|
204
205
|
else:
|
|
205
206
|
outname = args.outname
|
|
206
207
|
|
|
208
|
+
# skip finished
|
|
209
|
+
output = f'Offtracker_result_{outname}.csv'
|
|
210
|
+
if (os.path.isfile(output))&(not args.overwrite):
|
|
211
|
+
return 'skip {output} as the result exists!'
|
|
212
|
+
|
|
207
213
|
output = f'./temp/df_score_{outname}.csv'
|
|
208
214
|
if (os.path.isfile(output))&(not args.overwrite):
|
|
209
215
|
print(f'skip {output}')
|
|
@@ -294,9 +300,12 @@ def main():
|
|
|
294
300
|
df_score['raw_score'] = df_score['final_score_1'] + df_score['final_score_2']
|
|
295
301
|
df_score = df_score.sort_values('raw_score', ascending=False)
|
|
296
302
|
|
|
297
|
-
# local dedup
|
|
298
|
-
list_nondup = offtracker.dedup_two(df_score,'ID_1','ID_2')
|
|
299
|
-
df_result = df_score[list_nondup].copy()
|
|
303
|
+
# # local dedup
|
|
304
|
+
# list_nondup = offtracker.dedup_two(df_score,'ID_1','ID_2')
|
|
305
|
+
# df_result = df_score[list_nondup].copy()
|
|
306
|
+
|
|
307
|
+
# 2025.07.06 更新去重方式
|
|
308
|
+
df_result = df_score.drop_duplicates(subset=['region_index'], keep='first')
|
|
300
309
|
|
|
301
310
|
# 标准化分布
|
|
302
311
|
target_std=0.15
|
|
@@ -353,33 +362,30 @@ def main():
|
|
|
353
362
|
df_result.to_csv(output)
|
|
354
363
|
|
|
355
364
|
output = f'Offtracker_result_{outname}.csv'
|
|
356
|
-
|
|
357
|
-
|
|
365
|
+
# 2024.06.03. 以防 fdr<=fdr_thresh 滤掉了 track_score>=2 的位点
|
|
366
|
+
bool_fdr = df_result['fdr']<=fdr_thresh
|
|
367
|
+
bool_score = df_result['track_score']>=score_thresh
|
|
368
|
+
# 2025.06.05. BE可能会形成单边信号,导致 track_score 为负数,也保留
|
|
369
|
+
bool_neg_score = df_result['track_score']< -1
|
|
370
|
+
df_output = df_result[bool_fdr|bool_score|bool_neg_score].copy()
|
|
371
|
+
if pattern_ctr != 'none':
|
|
372
|
+
df_output = df_output[['target_location', 'best_strand','best_target','deletion','insertion','mismatch',
|
|
373
|
+
'exp_L_length', 'exp_R_length','ctr_L_length','ctr_R_length','L_length','R_length','signal_length',
|
|
374
|
+
'norm_best_seq_score','track_score', 'log2_track_score','fdr','rank']]
|
|
375
|
+
df_output.columns = ['target_location', 'strand', 'target', 'deletion', 'insertion', 'mismatch',
|
|
376
|
+
'exp_L_length', 'exp_R_length','ctr_L_length','ctr_R_length','L_length','R_length','signal_length',
|
|
377
|
+
'seq_score', 'track_score', 'log2_track_score','FDR', 'rank']
|
|
358
378
|
else:
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
df_output.columns = ['target_location', 'strand', 'target', 'deletion', 'insertion', 'mismatch',
|
|
370
|
-
'exp_L_length', 'exp_R_length','ctr_L_length','ctr_R_length','L_length','R_length','signal_length',
|
|
371
|
-
'seq_score', 'track_score', 'log2_track_score','FDR', 'rank']
|
|
372
|
-
else:
|
|
373
|
-
df_output = df_output[['target_location', 'best_strand','best_target','deletion','insertion','mismatch',
|
|
374
|
-
'L_length', 'R_length','signal_length',
|
|
375
|
-
'norm_best_seq_score','track_score', 'log2_track_score','fdr','rank']]
|
|
376
|
-
df_output.columns = ['target_location', 'strand', 'target', 'deletion', 'insertion', 'mismatch',
|
|
377
|
-
'L_length', 'R_length','signal_length',
|
|
378
|
-
'seq_score', 'track_score', 'log2_track_score','FDR', 'rank']
|
|
379
|
-
df_output.to_csv(f'Offtracker_result_{outname}.csv', index=False)
|
|
380
|
-
|
|
381
|
-
if args.clean:
|
|
382
|
-
shutil.rmtree('./temp')
|
|
379
|
+
df_output = df_output[['target_location', 'best_strand','best_target','deletion','insertion','mismatch',
|
|
380
|
+
'L_length', 'R_length','signal_length',
|
|
381
|
+
'norm_best_seq_score','track_score', 'log2_track_score','fdr','rank']]
|
|
382
|
+
df_output.columns = ['target_location', 'strand', 'target', 'deletion', 'insertion', 'mismatch',
|
|
383
|
+
'L_length', 'R_length','signal_length',
|
|
384
|
+
'seq_score', 'track_score', 'log2_track_score','FDR', 'rank']
|
|
385
|
+
df_output.to_csv(f'Offtracker_result_{outname}.csv', index=False)
|
|
386
|
+
|
|
387
|
+
if args.clean:
|
|
388
|
+
shutil.rmtree('./temp')
|
|
383
389
|
|
|
384
390
|
return 'Done!'
|
|
385
391
|
|
|
@@ -37,7 +37,7 @@ def main():
|
|
|
37
37
|
parser.add_argument('-o','--outdir' , type=str, required=True, help='The output folder')
|
|
38
38
|
parser.add_argument('-g','--genome' , type=str, default='hg38', help='File of chromosome sizes, or "hg38", "mm10" ')
|
|
39
39
|
parser.add_argument('-t','--thread' , type=int, default=4, help='Number of threads for parallel computing')
|
|
40
|
-
parser.add_argument('--quick_mode' , action='store_true', help='
|
|
40
|
+
# parser.add_argument('--quick_mode' , action='store_true', help='Quick mode is deprecated due to blast flaw.')
|
|
41
41
|
|
|
42
42
|
args = parser.parse_args()
|
|
43
43
|
|
|
@@ -57,7 +57,7 @@ def main():
|
|
|
57
57
|
os.makedirs(dir_output)
|
|
58
58
|
dir_ref_fa = args.ref
|
|
59
59
|
blast_db = args.blastdb
|
|
60
|
-
quick_mode = args.quick_mode
|
|
60
|
+
# quick_mode = args.quick_mode
|
|
61
61
|
|
|
62
62
|
# parameters for alignment
|
|
63
63
|
half_width = 100
|
|
@@ -95,13 +95,7 @@ def main():
|
|
|
95
95
|
if os.path.isfile(dir_sgRNA_blast):
|
|
96
96
|
print(f'{dir_sgRNA_blast} exists, skipped.')
|
|
97
97
|
else:
|
|
98
|
-
|
|
99
|
-
print('Using quick mode for BLAST')
|
|
100
|
-
blastx_cline = NcbiblastnCommandline(query=dir_sgRNA_fasta, task='blastn-short',out=dir_sgRNA_blast,
|
|
101
|
-
db=blast_db, evalue=100000,outfmt=6, num_threads=n_threads,
|
|
102
|
-
gapopen=4, gapextend=2, reward=2, word_size=5, dust='no', soft_masking=False)
|
|
103
|
-
else:
|
|
104
|
-
blastx_cline = NcbiblastnCommandline(query=dir_sgRNA_fasta, task='blastn-short',out=dir_sgRNA_blast,
|
|
98
|
+
blastx_cline = NcbiblastnCommandline(query=dir_sgRNA_fasta, task='blastn-short',out=dir_sgRNA_blast,
|
|
105
99
|
db=blast_db, evalue=100000,outfmt=6, num_threads=n_threads,
|
|
106
100
|
gapopen=4, gapextend=2, reward=2, word_size=4, dust='no', soft_masking=False)
|
|
107
101
|
print(f'BLAST for candidate off-target sites of {sgRNA_name}.')
|
|
@@ -316,7 +310,18 @@ def main():
|
|
|
316
310
|
df_candidate['mis_all'] = df_candidate[['mismatch','deletion','insertion']].sum(axis=1)
|
|
317
311
|
df_candidate = df_candidate[df_candidate['mis_all']<8]
|
|
318
312
|
|
|
319
|
-
|
|
313
|
+
# 2025.07.06 增加 region 标记用于去重
|
|
314
|
+
# 将 df_candidate 按照染色体分组
|
|
315
|
+
candidate_groups = df_candidate.groupby('chr')
|
|
316
|
+
# 定义一个空的列表,用于存储每个染色体的数据
|
|
317
|
+
list_dp = []
|
|
318
|
+
for chr_name, chr_candidate in candidate_groups:
|
|
319
|
+
dp_marked = offtracker.mark_regions_single_chr(pl.DataFrame(chr_candidate))
|
|
320
|
+
list_dp.append(dp_marked)
|
|
321
|
+
df_candidate = pl.concat(list_dp)
|
|
322
|
+
|
|
323
|
+
# 改成 pl 输出
|
|
324
|
+
df_candidate.write_csv(dir_df_candidate)
|
|
320
325
|
print(f'Output df_candidate_{sgRNA_name}.csv')
|
|
321
326
|
os.remove(temp_bed)
|
|
322
327
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{offtracker-2.11.3 → offtracker-2.12.0}/offtracker/utility/offtracker_blacklist_hg38.merged.bed
RENAMED
|
File without changes
|
{offtracker-2.11.3 → offtracker-2.12.0}/offtracker/utility/offtracker_blacklist_mm10.merged.bed
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|