offtracker 2.13.1__zip → 2.14.0__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {offtracker-2.13.1 → offtracker-2.14.0}/PKG-INFO +18 -4
- offtracker-2.13.1/offtracker.egg-info/PKG-INFO → offtracker-2.14.0/README.md +261 -259
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/_version.py +4 -2
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/snakefile/Snakefile_offtracker.smk +1 -1
- offtracker-2.13.1/README.md → offtracker-2.14.0/offtracker.egg-info/PKG-INFO +273 -247
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker.egg-info/requires.txt +1 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/scripts/offtracker_analysis.py +18 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/scripts/offtracker_candidates.py +14 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/scripts/offtracker_config.py +37 -3
- {offtracker-2.13.1 → offtracker-2.14.0}/scripts/offtracker_correction.py +59 -17
- {offtracker-2.13.1 → offtracker-2.14.0}/scripts/offtracker_qc.py +15 -2
- {offtracker-2.13.1 → offtracker-2.14.0}/setup.py +1 -1
- {offtracker-2.13.1 → offtracker-2.14.0}/LICENSE.txt +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/MANIFEST.in +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/X_offplot.py +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/X_offtracker.py +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/X_sequence.py +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/__init__.py +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/snakefile/Snakefile_QC.smk +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/1.1_bed2fr.py +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/1.3_bdg_normalize_v4.0.py +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/bedGraphToBigWig +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/hg38.chrom.sizes +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/mm10.chrom.sizes +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/offtracker_blacklist_hg38.merged.bed +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/offtracker_blacklist_mm10.merged.bed +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker.egg-info/SOURCES.txt +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker.egg-info/dependency_links.txt +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/offtracker.egg-info/top_level.txt +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/scripts/offtracker_init.py +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/scripts/offtracker_plot.py +0 -0
- {offtracker-2.13.1 → offtracker-2.14.0}/setup.cfg +0 -0
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
# 2023.08.11. adding a option for not normalizing the bw file
|
|
5
5
|
# 2025.05.22. refine the structure
|
|
6
6
|
# 2025.06.05. 增加 ignore_chr 选项,默认只取 common chromosomes,用于 1.1_bed2fr.py
|
|
7
|
+
# 2025.10.05. 添加 threads 监测,并添加互动模式 --cpu_help
|
|
7
8
|
|
|
8
9
|
import argparse
|
|
9
10
|
import os, glob, yaml
|
|
@@ -36,8 +37,10 @@ def main():
|
|
|
36
37
|
parser.add_argument('--blacklist' , type=str, default='same', help='Blacklist of genome regions in bed format. "none" for no filter')
|
|
37
38
|
parser.add_argument('--binsize' , type=str, default=100, help='Bin size for calculating bw residue')
|
|
38
39
|
parser.add_argument('--normalize' , type=str, default='True', help='Whether to normalize the BigWig file. "True" or "False"')
|
|
39
|
-
parser.add_argument('--ignore_chr' , action='store_true',
|
|
40
|
-
|
|
40
|
+
parser.add_argument('--ignore_chr' , action='store_true', help='If not set, only chr1-chr22, chrX, chrY, chrM will be analyzed.')
|
|
41
|
+
parser.add_argument('--cpu_help' , action='store_true', help='Interactive mode to recommend the number of threads and cores according to available memory and CPUs.'
|
|
42
|
+
'-t/--thread will be reset to the recommended value in this mode.'
|
|
43
|
+
)
|
|
41
44
|
|
|
42
45
|
args = parser.parse_args()
|
|
43
46
|
|
|
@@ -74,6 +77,37 @@ def main():
|
|
|
74
77
|
|
|
75
78
|
assert not isinstance(sample_names, str), 'No fastq file is detected!'
|
|
76
79
|
|
|
80
|
+
|
|
81
|
+
#####################
|
|
82
|
+
# threads 监测和推荐 #
|
|
83
|
+
#####################
|
|
84
|
+
import psutil
|
|
85
|
+
if args.cpu_help:
|
|
86
|
+
cpu_count_total = psutil.cpu_count(logical=True) # 逻辑 CPU 总数(包括超线程)
|
|
87
|
+
memory = psutil.virtual_memory()
|
|
88
|
+
memory_available = round(memory.available/1024/1024/1024, 2) # 可用内存 GB
|
|
89
|
+
print('Total available memory:', memory_available, 'GB')
|
|
90
|
+
print('Total CPU threads:', cpu_count_total)
|
|
91
|
+
n_sample = len(sample_names)
|
|
92
|
+
print('Total samples:', n_sample)
|
|
93
|
+
# 用户输入分配的最大内存和CPU线程数
|
|
94
|
+
max_memory_gb = float(input(f"Please input the maximum memory for the program (GB): 25 - {memory_available}"))
|
|
95
|
+
max_cpu_threads = int(input(f"Please input the maximum CPU threads for the program: 1 - {cpu_count_total}"))
|
|
96
|
+
assert (max_memory_gb < memory_available)&(max_memory_gb >= 25), f'max memory must be < available memory ({memory_available} GB) and >= 25 GB, current input: {max_memory_gb} GB'
|
|
97
|
+
assert (max_cpu_threads <= cpu_count_total)&(max_cpu_threads >= 1), f'max cpu threads must be <= total cpu threads ({cpu_count_total}) and >= 1, current input: {max_cpu_threads}'
|
|
98
|
+
# 计算推荐的 cpu 参数
|
|
99
|
+
max_task = min(int(max(max_memory_gb,30)/30), n_sample)
|
|
100
|
+
max_cpu_per_task = int(max_cpu_threads/max_task)
|
|
101
|
+
total_cpu = max_task*max_cpu_per_task
|
|
102
|
+
|
|
103
|
+
print('Assigning', max_cpu_per_task, f'CPU threads to each task. (i.e., -t {max_cpu_per_task})')
|
|
104
|
+
print('Number of parallel tasks:', max_task)
|
|
105
|
+
print(f'Please specify "--cores {total_cpu}" when formally running snakemake.')
|
|
106
|
+
|
|
107
|
+
n_threads = max_cpu_per_task
|
|
108
|
+
else:
|
|
109
|
+
n_threads = args.thread
|
|
110
|
+
|
|
77
111
|
dict_yaml = {
|
|
78
112
|
# fastq 信息
|
|
79
113
|
'files_R1':dict(zip(sample_names,files_R1)),
|
|
@@ -82,7 +116,7 @@ def main():
|
|
|
82
116
|
'input_dir':args.folder,
|
|
83
117
|
'output_dir':args.outdir,
|
|
84
118
|
# 运行参数
|
|
85
|
-
'thread':
|
|
119
|
+
'thread':n_threads,
|
|
86
120
|
'index':args.index,
|
|
87
121
|
'fasta':args.ref,
|
|
88
122
|
'binsize':args.binsize,
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
2
3
|
|
|
3
4
|
import polars as pl
|
|
4
5
|
import pandas as pd
|
|
@@ -35,7 +36,7 @@ def main():
|
|
|
35
36
|
parser.add_argument('--pam' , type=str, required=True, help='The protospacer adjacent motif' )
|
|
36
37
|
parser.add_argument('--pam_location', type=str, default='downstream', help='Upstream or downstream, default is downstream (Cas9)' )
|
|
37
38
|
# not used
|
|
38
|
-
parser.add_argument('--seqfolder' , type=str,
|
|
39
|
+
parser.add_argument('--seqfolder' , type=str, default='none', help='Actually not used in this script.Only in case you forget to remove this argument.')
|
|
39
40
|
|
|
40
41
|
args = parser.parse_args()
|
|
41
42
|
# 2025.08.08. 增加对阳性位点的 target_location 重比对功能,避免 blast 比对后的 realign 在更大范围内的存在不准确的情况
|
|
@@ -53,7 +54,7 @@ def main():
|
|
|
53
54
|
score_thresh = args.score
|
|
54
55
|
binsize = args.binsize
|
|
55
56
|
flank_max = args.flank_max
|
|
56
|
-
flank_regions = args.flank_regions
|
|
57
|
+
flank_regions = args.flank_regions # 如果 analysis 时修改了这个参数没有写 1000 的话会出bug,暂时懒得改了
|
|
57
58
|
smooth_times = args.smooth
|
|
58
59
|
window_size = args.window
|
|
59
60
|
seq_score_power = args.SeqScorePower
|
|
@@ -139,8 +140,8 @@ def main():
|
|
|
139
140
|
ctr_sample_files = all_sample_files[bool_ctr]
|
|
140
141
|
exp_sample_names = all_sample_names[bool_exp]
|
|
141
142
|
ctr_sample_names = all_sample_names[bool_ctr]
|
|
142
|
-
selected_sample_files = pd.concat([exp_sample_files,ctr_sample_files])
|
|
143
|
-
selected_sample_names = pd.concat([exp_sample_names,ctr_sample_names]) # no use
|
|
143
|
+
# selected_sample_files = pd.concat([exp_sample_files,ctr_sample_files])
|
|
144
|
+
# selected_sample_names = pd.concat([exp_sample_names,ctr_sample_names]) # no use
|
|
144
145
|
|
|
145
146
|
|
|
146
147
|
|
|
@@ -154,8 +155,17 @@ def main():
|
|
|
154
155
|
PAM = args.pam
|
|
155
156
|
PAM_loc = args.pam_location
|
|
156
157
|
# read result
|
|
157
|
-
dp_result = pl.read_csv(f'
|
|
158
|
-
|
|
158
|
+
dp_result = pl.read_csv(f'./temp/df_result_{outname}.csv')
|
|
159
|
+
# negative for next section
|
|
160
|
+
bool_fdr_bkg = dp_result['fdr']>fdr_thresh
|
|
161
|
+
bool_score_bkg = dp_result['track_score']<score_thresh
|
|
162
|
+
dp_result_bkg = dp_result.filter(bool_fdr_bkg & bool_score_bkg)
|
|
163
|
+
# positive
|
|
164
|
+
bool_fdr = pl.col('fdr')<=fdr_thresh
|
|
165
|
+
bool_score = pl.col('track_score')>=score_thresh
|
|
166
|
+
dp_result = dp_result.filter(bool_fdr & bool_score)
|
|
167
|
+
# bdg
|
|
168
|
+
dp_bdg = pl.read_csv(exp_sample_files.iloc[0], separator='\t', has_header=False,
|
|
159
169
|
schema_overrides={'chr':pl.String,'start':pl.Int32,'end':pl.Int32,'residual':pl.Float32})
|
|
160
170
|
# check and realign
|
|
161
171
|
bool_left_neg=(dp_result['exp_L_neg_1000']<-5)&(dp_result['exp_R_neg_1000']==0)
|
|
@@ -163,11 +173,13 @@ def main():
|
|
|
163
173
|
list_good_result = []
|
|
164
174
|
list_bad_left = []
|
|
165
175
|
list_bad_right = []
|
|
176
|
+
n_left_for_correct = 0
|
|
177
|
+
n_right_for_correct = 0
|
|
166
178
|
for a_left_bool, a_right_bool, a_row in zip(bool_left_neg, bool_right_neg, dp_result.iter_rows(named=True)):
|
|
167
179
|
if a_left_bool & a_right_bool:
|
|
168
180
|
raise ValueError('abnormal on both left and right')
|
|
169
181
|
if a_left_bool:
|
|
170
|
-
|
|
182
|
+
n_left_for_correct += 1
|
|
171
183
|
loc_shift_left = a_row['chr'] + ':' + str(a_row['st']-1000) + '-' + str(a_row['ed']-20)
|
|
172
184
|
region_index = a_row['region_index']
|
|
173
185
|
dp_bdg_chr = dp_bdg.filter(pl.col('chr') == a_row['chr'])
|
|
@@ -175,7 +187,7 @@ def main():
|
|
|
175
187
|
sr_candidate.loc['region_index'] = region_index
|
|
176
188
|
list_bad_left.append(sr_candidate)
|
|
177
189
|
elif a_right_bool:
|
|
178
|
-
|
|
190
|
+
n_right_for_correct += 1
|
|
179
191
|
loc_shift_right = a_row['chr'] + ':' + str(a_row['st']+20) + '-' + str(a_row['ed']+1000)
|
|
180
192
|
region_index = a_row['region_index']
|
|
181
193
|
dp_bdg_chr = dp_bdg.filter(pl.col('chr') == a_row['chr'])
|
|
@@ -188,17 +200,50 @@ def main():
|
|
|
188
200
|
df_cand_left = pd.DataFrame(list_bad_left)
|
|
189
201
|
df_cand_right = pd.DataFrame(list_bad_right)
|
|
190
202
|
df_cand_realign = pd.concat([df_cand_left, df_cand_right])
|
|
203
|
+
if len(df_cand_realign) == 0:
|
|
204
|
+
print('No candidate is found for realignment.')
|
|
205
|
+
return 'finished'
|
|
191
206
|
|
|
207
|
+
# 情况判断
|
|
208
|
+
n_success_realign = sum(df_cand_realign['realign']=='success')
|
|
209
|
+
n_fail_realign = sum(df_cand_realign['realign']!='success')
|
|
210
|
+
if (n_success_realign == 0) and (n_fail_realign > 0):
|
|
211
|
+
print(f'{n_fail_realign} candidates are found for realignment, but all failed.')
|
|
212
|
+
return 'finished'
|
|
213
|
+
elif (n_success_realign > 0) and (n_fail_realign > 0):
|
|
214
|
+
print(f'{n_success_realign} candidates succeeded, and {n_fail_realign} candidates failed.')
|
|
215
|
+
else:
|
|
216
|
+
print(f'{n_success_realign} candidates succeeded.')
|
|
217
|
+
|
|
218
|
+
df_cand_realign = df_cand_realign[df_cand_realign['realign']=='success']
|
|
192
219
|
seqfile = rf'correction_df_candidate_{outname}_realign.csv'
|
|
193
220
|
df_cand_realign.to_csv(seqfile)
|
|
194
221
|
|
|
195
222
|
# run offtracker_analysis with check_loc mode
|
|
196
223
|
running_log = rf'correction_analysis_{outname}.log'
|
|
224
|
+
# list 转空格分割参数
|
|
225
|
+
if isinstance(pattern_exp, list):
|
|
226
|
+
param_pattern_exp = ' '.join(pattern_exp)
|
|
227
|
+
else:
|
|
228
|
+
param_pattern_exp = pattern_exp
|
|
229
|
+
if isinstance(pattern_ctr, list):
|
|
230
|
+
param_pattern_ctr = ' '.join(pattern_ctr)
|
|
231
|
+
else:
|
|
232
|
+
param_pattern_ctr = pattern_ctr
|
|
233
|
+
if isinstance(flank_regions, list):
|
|
234
|
+
param_flank_regions = ' '.join([str(x) for x in flank_regions])
|
|
235
|
+
else:
|
|
236
|
+
param_flank_regions = flank_regions
|
|
237
|
+
if isinstance(folders, list):
|
|
238
|
+
param_folders = ' '.join([str(x) for x in folders])
|
|
239
|
+
else:
|
|
240
|
+
param_folders = folders
|
|
241
|
+
|
|
197
242
|
with open(running_log, "w+") as running_log:
|
|
198
243
|
command = f'offtracker_analysis.py -t {args.thread} -g {args.genome} --seqfile {seqfile} --name {sgRNA_name} \
|
|
199
|
-
--exp {
|
|
244
|
+
--exp {param_pattern_exp} --control {param_pattern_ctr} --outname {outname}_loc_correction -f {param_folders} -o {outdir} \
|
|
200
245
|
--fdr {fdr_thresh} --window {window_size} --smooth {smooth_times} --SeqScorePower {seq_score_power} \
|
|
201
|
-
--score {score_thresh} --binsize {binsize} --flank_max {flank_max} --flank_regions {
|
|
246
|
+
--score {score_thresh} --binsize {binsize} --flank_max {flank_max} --flank_regions {param_flank_regions} --CtrClip {ctr_clip} \
|
|
202
247
|
--check_loc'
|
|
203
248
|
command2 = shlex.split('bash -c "{}"'.format(command))
|
|
204
249
|
process_1 = subprocess.Popen(command2, stdout=running_log, stderr=subprocess.STDOUT )
|
|
@@ -213,10 +258,7 @@ def main():
|
|
|
213
258
|
#######################
|
|
214
259
|
## recalculate score ##
|
|
215
260
|
#######################
|
|
216
|
-
|
|
217
|
-
bool_fdr_bkg = dp_result_bkg['fdr']>fdr_thresh
|
|
218
|
-
bool_score_bkg = dp_result_bkg['track_score']<score_thresh
|
|
219
|
-
dp_result_bkg = dp_result_bkg.filter(bool_fdr_bkg & bool_score_bkg)
|
|
261
|
+
|
|
220
262
|
dp_result_realign = pl.read_csv(f'./temp/df_result_{outname}_loc_correction.csv')
|
|
221
263
|
|
|
222
264
|
# 兼容旧版输出列名
|
|
@@ -256,7 +298,7 @@ def main():
|
|
|
256
298
|
# ouput Offtracker result
|
|
257
299
|
bool_fdr = pl.col('fdr')<=fdr_thresh
|
|
258
300
|
bool_score = pl.col('track_score')>=score_thresh
|
|
259
|
-
dp_output = dp_result_new.filter(bool_fdr|bool_score)
|
|
301
|
+
dp_output = dp_result_new.filter(bool_fdr|bool_score)
|
|
260
302
|
if pattern_ctr != 'none':
|
|
261
303
|
dp_output = dp_output[['target_location', 'best_strand','best_target','deletion','insertion','mismatch',
|
|
262
304
|
'exp_L_length', 'exp_R_length','ctr_L_length','ctr_R_length','L_length','R_length','signal_length',
|
|
@@ -271,7 +313,7 @@ def main():
|
|
|
271
313
|
dp_output.columns = ['target_location', 'strand', 'target', 'deletion', 'insertion', 'mismatch',
|
|
272
314
|
'L_length', 'R_length','signal_length',
|
|
273
315
|
'seq_score', 'track_score', 'log2_track_score','FDR', 'rank']
|
|
274
|
-
|
|
316
|
+
dp_output.write_csv(f'Offtracker_result_{outname}.csv')
|
|
275
317
|
|
|
276
318
|
return 'correction finished'
|
|
277
319
|
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
|
|
4
|
-
THIS_VERSION = '0.4.
|
|
4
|
+
THIS_VERSION = '0.4.2'
|
|
5
|
+
|
|
6
|
+
# 2025.10.05. 0.4.2. 添加 threads 监测
|
|
5
7
|
|
|
6
8
|
import argparse
|
|
7
9
|
import os, glob, yaml
|
|
@@ -50,6 +52,17 @@ def main():
|
|
|
50
52
|
|
|
51
53
|
assert not isinstance(sample_names, str), 'No fastq file is detected!'
|
|
52
54
|
|
|
55
|
+
################
|
|
56
|
+
# threads 监测 #
|
|
57
|
+
################
|
|
58
|
+
import psutil
|
|
59
|
+
n_threads = args.thread
|
|
60
|
+
assert n_threads > 0, f'n_threads should be greater than 0, while {n_threads} is given.'
|
|
61
|
+
cpu_count_total = psutil.cpu_count(logical=True) # 逻辑 CPU 总数(包括超线程)
|
|
62
|
+
if n_threads > cpu_count_total:
|
|
63
|
+
n_threads = cpu_count_total-1
|
|
64
|
+
print(f'n_threads is reset to {n_threads} due to the total number of threads ({cpu_count_total}).')
|
|
65
|
+
|
|
53
66
|
dict_yaml = {
|
|
54
67
|
# fastq 信息
|
|
55
68
|
'files_R1':dict(zip(sample_names,files_R1)),
|
|
@@ -58,7 +71,7 @@ def main():
|
|
|
58
71
|
'input_dir':args.folder,
|
|
59
72
|
'output_dir':args.outdir,
|
|
60
73
|
# 运行参数
|
|
61
|
-
'thread':
|
|
74
|
+
'thread':n_threads,
|
|
62
75
|
'utility_dir':utility_dir
|
|
63
76
|
}
|
|
64
77
|
|
|
@@ -26,7 +26,7 @@ with open(os.path.join(here, package_folder, '_version.py'),'r',encoding='utf-8'
|
|
|
26
26
|
|
|
27
27
|
# requirements
|
|
28
28
|
REQUIRED = [
|
|
29
|
-
'pandas', 'polars>=1.19.0', 'numpy', 'biopython<=1.85', 'pybedtools', 'pyyaml',
|
|
29
|
+
'pandas', 'polars>=1.19.0', 'numpy', 'biopython<=1.85', 'pybedtools', 'pyyaml', 'psutil'
|
|
30
30
|
]
|
|
31
31
|
## pybedtools may be not supported in Windows
|
|
32
32
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/offtracker_blacklist_hg38.merged.bed
RENAMED
|
File without changes
|
{offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/offtracker_blacklist_mm10.merged.bed
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|