offtracker 2.13.1__zip → 2.13.2__zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {offtracker-2.13.1/offtracker.egg-info → offtracker-2.13.2}/PKG-INFO +1 -1
  2. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/_version.py +3 -2
  3. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/snakefile/Snakefile_offtracker.smk +1 -1
  4. {offtracker-2.13.1 → offtracker-2.13.2/offtracker.egg-info}/PKG-INFO +1 -1
  5. {offtracker-2.13.1 → offtracker-2.13.2}/scripts/offtracker_correction.py +59 -17
  6. {offtracker-2.13.1 → offtracker-2.13.2}/LICENSE.txt +0 -0
  7. {offtracker-2.13.1 → offtracker-2.13.2}/MANIFEST.in +0 -0
  8. {offtracker-2.13.1 → offtracker-2.13.2}/README.md +0 -0
  9. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/X_offplot.py +0 -0
  10. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/X_offtracker.py +0 -0
  11. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/X_sequence.py +0 -0
  12. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/__init__.py +0 -0
  13. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/snakefile/Snakefile_QC.smk +0 -0
  14. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/1.1_bed2fr.py +0 -0
  15. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/1.3_bdg_normalize_v4.0.py +0 -0
  16. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/bedGraphToBigWig +0 -0
  17. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/hg38.chrom.sizes +0 -0
  18. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/mm10.chrom.sizes +0 -0
  19. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/offtracker_blacklist_hg38.merged.bed +0 -0
  20. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker/utility/offtracker_blacklist_mm10.merged.bed +0 -0
  21. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker.egg-info/SOURCES.txt +0 -0
  22. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker.egg-info/dependency_links.txt +0 -0
  23. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker.egg-info/requires.txt +0 -0
  24. {offtracker-2.13.1 → offtracker-2.13.2}/offtracker.egg-info/top_level.txt +0 -0
  25. {offtracker-2.13.1 → offtracker-2.13.2}/scripts/offtracker_analysis.py +0 -0
  26. {offtracker-2.13.1 → offtracker-2.13.2}/scripts/offtracker_candidates.py +0 -0
  27. {offtracker-2.13.1 → offtracker-2.13.2}/scripts/offtracker_config.py +0 -0
  28. {offtracker-2.13.1 → offtracker-2.13.2}/scripts/offtracker_init.py +0 -0
  29. {offtracker-2.13.1 → offtracker-2.13.2}/scripts/offtracker_plot.py +0 -0
  30. {offtracker-2.13.1 → offtracker-2.13.2}/scripts/offtracker_qc.py +0 -0
  31. {offtracker-2.13.1 → offtracker-2.13.2}/setup.cfg +0 -0
  32. {offtracker-2.13.1 → offtracker-2.13.2}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: offtracker
3
- Version: 2.13.1
3
+ Version: 2.13.2
4
4
  Summary: Tracking-seq data analysis
5
5
  Home-page: https://github.com/Lan-lab/offtracker
6
6
  Author: Runda Xu
@@ -1,4 +1,4 @@
1
- __version__ = "2.13.1"
1
+ __version__ = "2.13.2"
2
2
  # 2023.08.11. v1.1.0 adding a option for not normalizing the bw file
3
3
  # 2023.10.26. v1.9.0 prerelease for v2.0
4
4
  # 2023.10.27. v2.0.0 大更新,还没微调
@@ -43,4 +43,5 @@ __version__ = "2.13.1"
43
43
  # 2025.07.04. v2.12.2 新增 region_index 标记区域,用于更好的去重
44
44
  # 2025.07.18. v2.12.3 新增QC自动避免重复读取 trimmed fastq files
45
45
  # 2025.08.08. v2.13.0 测试 local realign 功能
46
- # 2025.08.09. v2.13.1 测试 correction 功能
46
+ # 2025.08.09. v2.13.1 测试 correction 功能
47
+ # 2025.08.09. v2.13.2 chromap + trim 参数
@@ -48,7 +48,7 @@ rule chromap:
48
48
  temp(os.path.join(_output_dir,"{sample}.chromapx.bed"))
49
49
  shell:
50
50
  """
51
- chromap -l 3000 --low-mem --BED --remove-pcr-duplicates \
51
+ chromap -l 3000 --low-mem --BED --remove-pcr-duplicates --trim-adapters \
52
52
  --min-read-length 10 --allocate-multi-mappings \
53
53
  -x {params.index} -r {params.fasta} -t {threads} -1 {input.R1} -2 {input.R2} -o {output}
54
54
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: offtracker
3
- Version: 2.13.1
3
+ Version: 2.13.2
4
4
  Summary: Tracking-seq data analysis
5
5
  Home-page: https://github.com/Lan-lab/offtracker
6
6
  Author: Runda Xu
@@ -1,4 +1,5 @@
1
-
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
2
3
 
3
4
  import polars as pl
4
5
  import pandas as pd
@@ -35,7 +36,7 @@ def main():
35
36
  parser.add_argument('--pam' , type=str, required=True, help='The protospacer adjacent motif' )
36
37
  parser.add_argument('--pam_location', type=str, default='downstream', help='Upstream or downstream, default is downstream (Cas9)' )
37
38
  # not used
38
- parser.add_argument('--seqfolder' , type=str, required=True, help='Actually not used in this script.Only in case you forget to remove this argument.')
39
+ parser.add_argument('--seqfolder' , type=str, default='none', help='Actually not used in this script.Only in case you forget to remove this argument.')
39
40
 
40
41
  args = parser.parse_args()
41
42
  # 2025.08.08. 增加对阳性位点的 target_location 重比对功能,避免 blast 比对后的 realign 在更大范围内的存在不准确的情况
@@ -53,7 +54,7 @@ def main():
53
54
  score_thresh = args.score
54
55
  binsize = args.binsize
55
56
  flank_max = args.flank_max
56
- flank_regions = args.flank_regions
57
+ flank_regions = args.flank_regions # 如果 analysis 时修改了这个参数没有写 1000 的话会出bug,暂时懒得改了
57
58
  smooth_times = args.smooth
58
59
  window_size = args.window
59
60
  seq_score_power = args.SeqScorePower
@@ -139,8 +140,8 @@ def main():
139
140
  ctr_sample_files = all_sample_files[bool_ctr]
140
141
  exp_sample_names = all_sample_names[bool_exp]
141
142
  ctr_sample_names = all_sample_names[bool_ctr]
142
- selected_sample_files = pd.concat([exp_sample_files,ctr_sample_files])
143
- selected_sample_names = pd.concat([exp_sample_names,ctr_sample_names]) # no use
143
+ # selected_sample_files = pd.concat([exp_sample_files,ctr_sample_files])
144
+ # selected_sample_names = pd.concat([exp_sample_names,ctr_sample_names]) # no use
144
145
 
145
146
 
146
147
 
@@ -154,8 +155,17 @@ def main():
154
155
  PAM = args.pam
155
156
  PAM_loc = args.pam_location
156
157
  # read result
157
- dp_result = pl.read_csv(f'Offtracker_result_{outname}.csv')
158
- dp_bdg = pl.read_parquet(selected_sample_files[0], separator='\t', has_header=False,
158
+ dp_result = pl.read_csv(f'./temp/df_result_{outname}.csv')
159
+ # negative for next section
160
+ bool_fdr_bkg = dp_result['fdr']>fdr_thresh
161
+ bool_score_bkg = dp_result['track_score']<score_thresh
162
+ dp_result_bkg = dp_result.filter(bool_fdr_bkg & bool_score_bkg)
163
+ # positive
164
+ bool_fdr = pl.col('fdr')<=fdr_thresh
165
+ bool_score = pl.col('track_score')>=score_thresh
166
+ dp_result = dp_result.filter(bool_fdr & bool_score)
167
+ # bdg
168
+ dp_bdg = pl.read_csv(exp_sample_files.iloc[0], separator='\t', has_header=False,
159
169
  schema_overrides={'chr':pl.String,'start':pl.Int32,'end':pl.Int32,'residual':pl.Float32})
160
170
  # check and realign
161
171
  bool_left_neg=(dp_result['exp_L_neg_1000']<-5)&(dp_result['exp_R_neg_1000']==0)
@@ -163,11 +173,13 @@ def main():
163
173
  list_good_result = []
164
174
  list_bad_left = []
165
175
  list_bad_right = []
176
+ n_left_for_correct = 0
177
+ n_right_for_correct = 0
166
178
  for a_left_bool, a_right_bool, a_row in zip(bool_left_neg, bool_right_neg, dp_result.iter_rows(named=True)):
167
179
  if a_left_bool & a_right_bool:
168
180
  raise ValueError('abnormal on both left and right')
169
181
  if a_left_bool:
170
- print('left')
182
+ n_left_for_correct += 1
171
183
  loc_shift_left = a_row['chr'] + ':' + str(a_row['st']-1000) + '-' + str(a_row['ed']-20)
172
184
  region_index = a_row['region_index']
173
185
  dp_bdg_chr = dp_bdg.filter(pl.col('chr') == a_row['chr'])
@@ -175,7 +187,7 @@ def main():
175
187
  sr_candidate.loc['region_index'] = region_index
176
188
  list_bad_left.append(sr_candidate)
177
189
  elif a_right_bool:
178
- print('right')
190
+ n_right_for_correct += 1
179
191
  loc_shift_right = a_row['chr'] + ':' + str(a_row['st']+20) + '-' + str(a_row['ed']+1000)
180
192
  region_index = a_row['region_index']
181
193
  dp_bdg_chr = dp_bdg.filter(pl.col('chr') == a_row['chr'])
@@ -188,17 +200,50 @@ def main():
188
200
  df_cand_left = pd.DataFrame(list_bad_left)
189
201
  df_cand_right = pd.DataFrame(list_bad_right)
190
202
  df_cand_realign = pd.concat([df_cand_left, df_cand_right])
203
+ if len(df_cand_realign) == 0:
204
+ print('No candidate is found for realignment.')
205
+ return 'finished'
191
206
 
207
+ # 情况判断
208
+ n_success_realign = sum(df_cand_realign['realign']=='success')
209
+ n_fail_realign = sum(df_cand_realign['realign']!='success')
210
+ if (n_success_realign == 0) and (n_fail_realign > 0):
211
+ print(f'{n_fail_realign} candidates are found for realignment, but all failed.')
212
+ return 'finished'
213
+ elif (n_success_realign > 0) and (n_fail_realign > 0):
214
+ print(f'{n_success_realign} candidates succeeded, and {n_fail_realign} candidates failed.')
215
+ else:
216
+ print(f'{n_success_realign} candidates succeeded.')
217
+
218
+ df_cand_realign = df_cand_realign[df_cand_realign['realign']=='success']
192
219
  seqfile = rf'correction_df_candidate_{outname}_realign.csv'
193
220
  df_cand_realign.to_csv(seqfile)
194
221
 
195
222
  # run offtracker_analysis with check_loc mode
196
223
  running_log = rf'correction_analysis_{outname}.log'
224
+ # list 转空格分割参数
225
+ if isinstance(pattern_exp, list):
226
+ param_pattern_exp = ' '.join(pattern_exp)
227
+ else:
228
+ param_pattern_exp = pattern_exp
229
+ if isinstance(pattern_ctr, list):
230
+ param_pattern_ctr = ' '.join(pattern_ctr)
231
+ else:
232
+ param_pattern_ctr = pattern_ctr
233
+ if isinstance(flank_regions, list):
234
+ param_flank_regions = ' '.join([str(x) for x in flank_regions])
235
+ else:
236
+ param_flank_regions = flank_regions
237
+ if isinstance(folders, list):
238
+ param_folders = ' '.join([str(x) for x in folders])
239
+ else:
240
+ param_folders = folders
241
+
197
242
  with open(running_log, "w+") as running_log:
198
243
  command = f'offtracker_analysis.py -t {args.thread} -g {args.genome} --seqfile {seqfile} --name {sgRNA_name} \
199
- --exp {pattern_exp} --control {pattern_ctr} --outname {outname}_loc_correction -f {folders} -o {outdir} \
244
+ --exp {param_pattern_exp} --control {param_pattern_ctr} --outname {outname}_loc_correction -f {param_folders} -o {outdir} \
200
245
  --fdr {fdr_thresh} --window {window_size} --smooth {smooth_times} --SeqScorePower {seq_score_power} \
201
- --score {score_thresh} --binsize {binsize} --flank_max {flank_max} --flank_regions {flank_regions} --CtrClip {ctr_clip} \
246
+ --score {score_thresh} --binsize {binsize} --flank_max {flank_max} --flank_regions {param_flank_regions} --CtrClip {ctr_clip} \
202
247
  --check_loc'
203
248
  command2 = shlex.split('bash -c "{}"'.format(command))
204
249
  process_1 = subprocess.Popen(command2, stdout=running_log, stderr=subprocess.STDOUT )
@@ -213,10 +258,7 @@ def main():
213
258
  #######################
214
259
  ## recalculate score ##
215
260
  #######################
216
- dp_result_bkg = pl.read_csv(f'./temp/df_result_{outname}.csv')
217
- bool_fdr_bkg = dp_result_bkg['fdr']>fdr_thresh
218
- bool_score_bkg = dp_result_bkg['track_score']<score_thresh
219
- dp_result_bkg = dp_result_bkg.filter(bool_fdr_bkg & bool_score_bkg)
261
+
220
262
  dp_result_realign = pl.read_csv(f'./temp/df_result_{outname}_loc_correction.csv')
221
263
 
222
264
  # 兼容旧版输出列名
@@ -256,7 +298,7 @@ def main():
256
298
  # ouput Offtracker result
257
299
  bool_fdr = pl.col('fdr')<=fdr_thresh
258
300
  bool_score = pl.col('track_score')>=score_thresh
259
- dp_output = dp_result_new.filter(bool_fdr|bool_score).copy()
301
+ dp_output = dp_result_new.filter(bool_fdr|bool_score)
260
302
  if pattern_ctr != 'none':
261
303
  dp_output = dp_output[['target_location', 'best_strand','best_target','deletion','insertion','mismatch',
262
304
  'exp_L_length', 'exp_R_length','ctr_L_length','ctr_R_length','L_length','R_length','signal_length',
@@ -271,7 +313,7 @@ def main():
271
313
  dp_output.columns = ['target_location', 'strand', 'target', 'deletion', 'insertion', 'mismatch',
272
314
  'L_length', 'R_length','signal_length',
273
315
  'seq_score', 'track_score', 'log2_track_score','FDR', 'rank']
274
- dp_output.write_csv(f'Offtracker_result_{outname}.csv')
316
+ dp_output.write_csv(f'Offtracker_result_{outname}.csv')
275
317
 
276
318
  return 'correction finished'
277
319
 
File without changes
File without changes
File without changes
File without changes
File without changes