offtracker 2.13.1__zip → 2.13.2__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {offtracker-2.13.1/offtracker.egg-info → offtracker-2.13.2}/PKG-INFO +1 -1
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/_version.py +3 -2
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/snakefile/Snakefile_offtracker.smk +1 -1
- {offtracker-2.13.1 → offtracker-2.13.2/offtracker.egg-info}/PKG-INFO +1 -1
- {offtracker-2.13.1 → offtracker-2.13.2}/scripts/offtracker_correction.py +59 -17
- {offtracker-2.13.1 → offtracker-2.13.2}/LICENSE.txt +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/MANIFEST.in +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/README.md +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/X_offplot.py +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/X_offtracker.py +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/X_sequence.py +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/__init__.py +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/snakefile/Snakefile_QC.smk +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/1.1_bed2fr.py +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/1.3_bdg_normalize_v4.0.py +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/bedGraphToBigWig +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/hg38.chrom.sizes +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/mm10.chrom.sizes +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/offtracker_blacklist_hg38.merged.bed +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/offtracker_blacklist_mm10.merged.bed +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker.egg-info/SOURCES.txt +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker.egg-info/dependency_links.txt +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker.egg-info/requires.txt +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/offtracker.egg-info/top_level.txt +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/scripts/offtracker_analysis.py +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/scripts/offtracker_candidates.py +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/scripts/offtracker_config.py +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/scripts/offtracker_init.py +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/scripts/offtracker_plot.py +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/scripts/offtracker_qc.py +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/setup.cfg +0 -0
- {offtracker-2.13.1 → offtracker-2.13.2}/setup.py +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
__version__ = "2.13.
|
|
1
|
+
__version__ = "2.13.2"
|
|
2
2
|
# 2023.08.11. v1.1.0 adding a option for not normalizing the bw file
|
|
3
3
|
# 2023.10.26. v1.9.0 prerelease for v2.0
|
|
4
4
|
# 2023.10.27. v2.0.0 大更新,还没微调
|
|
@@ -43,4 +43,5 @@ __version__ = "2.13.1"
|
|
|
43
43
|
# 2025.07.04. v2.12.2 新增 region_index 标记区域,用于更好的去重
|
|
44
44
|
# 2025.07.18. v2.12.3 新增QC自动避免重复读取 trimmed fastq files
|
|
45
45
|
# 2025.08.08. v2.13.0 测试 local realign 功能
|
|
46
|
-
# 2025.08.09. v2.13.1 测试 correction 功能
|
|
46
|
+
# 2025.08.09. v2.13.1 测试 correction 功能
|
|
47
|
+
# 2025.08.09. v2.13.2 chromap + trim 参数
|
|
@@ -48,7 +48,7 @@ rule chromap:
|
|
|
48
48
|
temp(os.path.join(_output_dir,"{sample}.chromapx.bed"))
|
|
49
49
|
shell:
|
|
50
50
|
"""
|
|
51
|
-
chromap -l 3000 --low-mem --BED --remove-pcr-duplicates \
|
|
51
|
+
chromap -l 3000 --low-mem --BED --remove-pcr-duplicates --trim-adapters \
|
|
52
52
|
--min-read-length 10 --allocate-multi-mappings \
|
|
53
53
|
-x {params.index} -r {params.fasta} -t {threads} -1 {input.R1} -2 {input.R2} -o {output}
|
|
54
54
|
"""
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
2
3
|
|
|
3
4
|
import polars as pl
|
|
4
5
|
import pandas as pd
|
|
@@ -35,7 +36,7 @@ def main():
|
|
|
35
36
|
parser.add_argument('--pam' , type=str, required=True, help='The protospacer adjacent motif' )
|
|
36
37
|
parser.add_argument('--pam_location', type=str, default='downstream', help='Upstream or downstream, default is downstream (Cas9)' )
|
|
37
38
|
# not used
|
|
38
|
-
parser.add_argument('--seqfolder' , type=str,
|
|
39
|
+
parser.add_argument('--seqfolder' , type=str, default='none', help='Actually not used in this script.Only in case you forget to remove this argument.')
|
|
39
40
|
|
|
40
41
|
args = parser.parse_args()
|
|
41
42
|
# 2025.08.08. 增加对阳性位点的 target_location 重比对功能,避免 blast 比对后的 realign 在更大范围内的存在不准确的情况
|
|
@@ -53,7 +54,7 @@ def main():
|
|
|
53
54
|
score_thresh = args.score
|
|
54
55
|
binsize = args.binsize
|
|
55
56
|
flank_max = args.flank_max
|
|
56
|
-
flank_regions = args.flank_regions
|
|
57
|
+
flank_regions = args.flank_regions # 如果 analysis 时修改了这个参数没有写 1000 的话会出bug,暂时懒得改了
|
|
57
58
|
smooth_times = args.smooth
|
|
58
59
|
window_size = args.window
|
|
59
60
|
seq_score_power = args.SeqScorePower
|
|
@@ -139,8 +140,8 @@ def main():
|
|
|
139
140
|
ctr_sample_files = all_sample_files[bool_ctr]
|
|
140
141
|
exp_sample_names = all_sample_names[bool_exp]
|
|
141
142
|
ctr_sample_names = all_sample_names[bool_ctr]
|
|
142
|
-
selected_sample_files = pd.concat([exp_sample_files,ctr_sample_files])
|
|
143
|
-
selected_sample_names = pd.concat([exp_sample_names,ctr_sample_names]) # no use
|
|
143
|
+
# selected_sample_files = pd.concat([exp_sample_files,ctr_sample_files])
|
|
144
|
+
# selected_sample_names = pd.concat([exp_sample_names,ctr_sample_names]) # no use
|
|
144
145
|
|
|
145
146
|
|
|
146
147
|
|
|
@@ -154,8 +155,17 @@ def main():
|
|
|
154
155
|
PAM = args.pam
|
|
155
156
|
PAM_loc = args.pam_location
|
|
156
157
|
# read result
|
|
157
|
-
dp_result = pl.read_csv(f'
|
|
158
|
-
|
|
158
|
+
dp_result = pl.read_csv(f'./temp/df_result_{outname}.csv')
|
|
159
|
+
# negative for next section
|
|
160
|
+
bool_fdr_bkg = dp_result['fdr']>fdr_thresh
|
|
161
|
+
bool_score_bkg = dp_result['track_score']<score_thresh
|
|
162
|
+
dp_result_bkg = dp_result.filter(bool_fdr_bkg & bool_score_bkg)
|
|
163
|
+
# positive
|
|
164
|
+
bool_fdr = pl.col('fdr')<=fdr_thresh
|
|
165
|
+
bool_score = pl.col('track_score')>=score_thresh
|
|
166
|
+
dp_result = dp_result.filter(bool_fdr & bool_score)
|
|
167
|
+
# bdg
|
|
168
|
+
dp_bdg = pl.read_csv(exp_sample_files.iloc[0], separator='\t', has_header=False,
|
|
159
169
|
schema_overrides={'chr':pl.String,'start':pl.Int32,'end':pl.Int32,'residual':pl.Float32})
|
|
160
170
|
# check and realign
|
|
161
171
|
bool_left_neg=(dp_result['exp_L_neg_1000']<-5)&(dp_result['exp_R_neg_1000']==0)
|
|
@@ -163,11 +173,13 @@ def main():
|
|
|
163
173
|
list_good_result = []
|
|
164
174
|
list_bad_left = []
|
|
165
175
|
list_bad_right = []
|
|
176
|
+
n_left_for_correct = 0
|
|
177
|
+
n_right_for_correct = 0
|
|
166
178
|
for a_left_bool, a_right_bool, a_row in zip(bool_left_neg, bool_right_neg, dp_result.iter_rows(named=True)):
|
|
167
179
|
if a_left_bool & a_right_bool:
|
|
168
180
|
raise ValueError('abnormal on both left and right')
|
|
169
181
|
if a_left_bool:
|
|
170
|
-
|
|
182
|
+
n_left_for_correct += 1
|
|
171
183
|
loc_shift_left = a_row['chr'] + ':' + str(a_row['st']-1000) + '-' + str(a_row['ed']-20)
|
|
172
184
|
region_index = a_row['region_index']
|
|
173
185
|
dp_bdg_chr = dp_bdg.filter(pl.col('chr') == a_row['chr'])
|
|
@@ -175,7 +187,7 @@ def main():
|
|
|
175
187
|
sr_candidate.loc['region_index'] = region_index
|
|
176
188
|
list_bad_left.append(sr_candidate)
|
|
177
189
|
elif a_right_bool:
|
|
178
|
-
|
|
190
|
+
n_right_for_correct += 1
|
|
179
191
|
loc_shift_right = a_row['chr'] + ':' + str(a_row['st']+20) + '-' + str(a_row['ed']+1000)
|
|
180
192
|
region_index = a_row['region_index']
|
|
181
193
|
dp_bdg_chr = dp_bdg.filter(pl.col('chr') == a_row['chr'])
|
|
@@ -188,17 +200,50 @@ def main():
|
|
|
188
200
|
df_cand_left = pd.DataFrame(list_bad_left)
|
|
189
201
|
df_cand_right = pd.DataFrame(list_bad_right)
|
|
190
202
|
df_cand_realign = pd.concat([df_cand_left, df_cand_right])
|
|
203
|
+
if len(df_cand_realign) == 0:
|
|
204
|
+
print('No candidate is found for realignment.')
|
|
205
|
+
return 'finished'
|
|
191
206
|
|
|
207
|
+
# 情况判断
|
|
208
|
+
n_success_realign = sum(df_cand_realign['realign']=='success')
|
|
209
|
+
n_fail_realign = sum(df_cand_realign['realign']!='success')
|
|
210
|
+
if (n_success_realign == 0) and (n_fail_realign > 0):
|
|
211
|
+
print(f'{n_fail_realign} candidates are found for realignment, but all failed.')
|
|
212
|
+
return 'finished'
|
|
213
|
+
elif (n_success_realign > 0) and (n_fail_realign > 0):
|
|
214
|
+
print(f'{n_success_realign} candidates succeeded, and {n_fail_realign} candidates failed.')
|
|
215
|
+
else:
|
|
216
|
+
print(f'{n_success_realign} candidates succeeded.')
|
|
217
|
+
|
|
218
|
+
df_cand_realign = df_cand_realign[df_cand_realign['realign']=='success']
|
|
192
219
|
seqfile = rf'correction_df_candidate_{outname}_realign.csv'
|
|
193
220
|
df_cand_realign.to_csv(seqfile)
|
|
194
221
|
|
|
195
222
|
# run offtracker_analysis with check_loc mode
|
|
196
223
|
running_log = rf'correction_analysis_{outname}.log'
|
|
224
|
+
# list 转空格分割参数
|
|
225
|
+
if isinstance(pattern_exp, list):
|
|
226
|
+
param_pattern_exp = ' '.join(pattern_exp)
|
|
227
|
+
else:
|
|
228
|
+
param_pattern_exp = pattern_exp
|
|
229
|
+
if isinstance(pattern_ctr, list):
|
|
230
|
+
param_pattern_ctr = ' '.join(pattern_ctr)
|
|
231
|
+
else:
|
|
232
|
+
param_pattern_ctr = pattern_ctr
|
|
233
|
+
if isinstance(flank_regions, list):
|
|
234
|
+
param_flank_regions = ' '.join([str(x) for x in flank_regions])
|
|
235
|
+
else:
|
|
236
|
+
param_flank_regions = flank_regions
|
|
237
|
+
if isinstance(folders, list):
|
|
238
|
+
param_folders = ' '.join([str(x) for x in folders])
|
|
239
|
+
else:
|
|
240
|
+
param_folders = folders
|
|
241
|
+
|
|
197
242
|
with open(running_log, "w+") as running_log:
|
|
198
243
|
command = f'offtracker_analysis.py -t {args.thread} -g {args.genome} --seqfile {seqfile} --name {sgRNA_name} \
|
|
199
|
-
--exp {
|
|
244
|
+
--exp {param_pattern_exp} --control {param_pattern_ctr} --outname {outname}_loc_correction -f {param_folders} -o {outdir} \
|
|
200
245
|
--fdr {fdr_thresh} --window {window_size} --smooth {smooth_times} --SeqScorePower {seq_score_power} \
|
|
201
|
-
--score {score_thresh} --binsize {binsize} --flank_max {flank_max} --flank_regions {
|
|
246
|
+
--score {score_thresh} --binsize {binsize} --flank_max {flank_max} --flank_regions {param_flank_regions} --CtrClip {ctr_clip} \
|
|
202
247
|
--check_loc'
|
|
203
248
|
command2 = shlex.split('bash -c "{}"'.format(command))
|
|
204
249
|
process_1 = subprocess.Popen(command2, stdout=running_log, stderr=subprocess.STDOUT )
|
|
@@ -213,10 +258,7 @@ def main():
|
|
|
213
258
|
#######################
|
|
214
259
|
## recalculate score ##
|
|
215
260
|
#######################
|
|
216
|
-
|
|
217
|
-
bool_fdr_bkg = dp_result_bkg['fdr']>fdr_thresh
|
|
218
|
-
bool_score_bkg = dp_result_bkg['track_score']<score_thresh
|
|
219
|
-
dp_result_bkg = dp_result_bkg.filter(bool_fdr_bkg & bool_score_bkg)
|
|
261
|
+
|
|
220
262
|
dp_result_realign = pl.read_csv(f'./temp/df_result_{outname}_loc_correction.csv')
|
|
221
263
|
|
|
222
264
|
# 兼容旧版输出列名
|
|
@@ -256,7 +298,7 @@ def main():
|
|
|
256
298
|
# ouput Offtracker result
|
|
257
299
|
bool_fdr = pl.col('fdr')<=fdr_thresh
|
|
258
300
|
bool_score = pl.col('track_score')>=score_thresh
|
|
259
|
-
dp_output = dp_result_new.filter(bool_fdr|bool_score)
|
|
301
|
+
dp_output = dp_result_new.filter(bool_fdr|bool_score)
|
|
260
302
|
if pattern_ctr != 'none':
|
|
261
303
|
dp_output = dp_output[['target_location', 'best_strand','best_target','deletion','insertion','mismatch',
|
|
262
304
|
'exp_L_length', 'exp_R_length','ctr_L_length','ctr_R_length','L_length','R_length','signal_length',
|
|
@@ -271,7 +313,7 @@ def main():
|
|
|
271
313
|
dp_output.columns = ['target_location', 'strand', 'target', 'deletion', 'insertion', 'mismatch',
|
|
272
314
|
'L_length', 'R_length','signal_length',
|
|
273
315
|
'seq_score', 'track_score', 'log2_track_score','FDR', 'rank']
|
|
274
|
-
|
|
316
|
+
dp_output.write_csv(f'Offtracker_result_{outname}.csv')
|
|
275
317
|
|
|
276
318
|
return 'correction finished'
|
|
277
319
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/offtracker_blacklist_hg38.merged.bed
RENAMED
|
File without changes
|
{offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/offtracker_blacklist_mm10.merged.bed
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|