offtracker 2.13.1__zip → 2.14.0__zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {offtracker-2.13.1 → offtracker-2.14.0}/PKG-INFO +18 -4
  2. offtracker-2.13.1/offtracker.egg-info/PKG-INFO → offtracker-2.14.0/README.md +261 -259
  3. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/_version.py +4 -2
  4. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/snakefile/Snakefile_offtracker.smk +1 -1
  5. offtracker-2.13.1/README.md → offtracker-2.14.0/offtracker.egg-info/PKG-INFO +273 -247
  6. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker.egg-info/requires.txt +1 -0
  7. {offtracker-2.13.1 → offtracker-2.14.0}/scripts/offtracker_analysis.py +18 -0
  8. {offtracker-2.13.1 → offtracker-2.14.0}/scripts/offtracker_candidates.py +14 -0
  9. {offtracker-2.13.1 → offtracker-2.14.0}/scripts/offtracker_config.py +37 -3
  10. {offtracker-2.13.1 → offtracker-2.14.0}/scripts/offtracker_correction.py +59 -17
  11. {offtracker-2.13.1 → offtracker-2.14.0}/scripts/offtracker_qc.py +15 -2
  12. {offtracker-2.13.1 → offtracker-2.14.0}/setup.py +1 -1
  13. {offtracker-2.13.1 → offtracker-2.14.0}/LICENSE.txt +0 -0
  14. {offtracker-2.13.1 → offtracker-2.14.0}/MANIFEST.in +0 -0
  15. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/X_offplot.py +0 -0
  16. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/X_offtracker.py +0 -0
  17. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/X_sequence.py +0 -0
  18. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/__init__.py +0 -0
  19. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/snakefile/Snakefile_QC.smk +0 -0
  20. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/1.1_bed2fr.py +0 -0
  21. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/1.3_bdg_normalize_v4.0.py +0 -0
  22. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/bedGraphToBigWig +0 -0
  23. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/hg38.chrom.sizes +0 -0
  24. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/mm10.chrom.sizes +0 -0
  25. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/offtracker_blacklist_hg38.merged.bed +0 -0
  26. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker/utility/offtracker_blacklist_mm10.merged.bed +0 -0
  27. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker.egg-info/SOURCES.txt +0 -0
  28. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker.egg-info/dependency_links.txt +0 -0
  29. {offtracker-2.13.1 → offtracker-2.14.0}/offtracker.egg-info/top_level.txt +0 -0
  30. {offtracker-2.13.1 → offtracker-2.14.0}/scripts/offtracker_init.py +0 -0
  31. {offtracker-2.13.1 → offtracker-2.14.0}/scripts/offtracker_plot.py +0 -0
  32. {offtracker-2.13.1 → offtracker-2.14.0}/setup.cfg +0 -0
@@ -4,6 +4,7 @@
4
4
  # 2023.08.11. adding a option for not normalizing the bw file
5
5
  # 2025.05.22. refine the structure
6
6
  # 2025.06.05. 增加 ignore_chr 选项,默认只取 common chromosomes,用于 1.1_bed2fr.py
7
+ # 2025.10.05. 添加 threads 监测,并添加互动模式 --cpu_help
7
8
 
8
9
  import argparse
9
10
  import os, glob, yaml
@@ -36,8 +37,10 @@ def main():
36
37
  parser.add_argument('--blacklist' , type=str, default='same', help='Blacklist of genome regions in bed format. "none" for no filter')
37
38
  parser.add_argument('--binsize' , type=str, default=100, help='Bin size for calculating bw residue')
38
39
  parser.add_argument('--normalize' , type=str, default='True', help='Whether to normalize the BigWig file. "True" or "False"')
39
- parser.add_argument('--ignore_chr' , action='store_true', help='If not set, only chr1-chr22, chrX, chrY, chrM will be analyzed.')
40
-
40
+ parser.add_argument('--ignore_chr' , action='store_true', help='If not set, only chr1-chr22, chrX, chrY, chrM will be analyzed.')
41
+ parser.add_argument('--cpu_help' , action='store_true', help='Interactive mode to recommend the number of threads and cores according to available memory and CPUs.'
42
+ '-t/--thread will be reset to the recommended value in this mode.'
43
+ )
41
44
 
42
45
  args = parser.parse_args()
43
46
 
@@ -74,6 +77,37 @@ def main():
74
77
 
75
78
  assert not isinstance(sample_names, str), 'No fastq file is detected!'
76
79
 
80
+
81
+ #####################
82
+ # threads 监测和推荐 #
83
+ #####################
84
+ import psutil
85
+ if args.cpu_help:
86
+ cpu_count_total = psutil.cpu_count(logical=True) # 逻辑 CPU 总数(包括超线程)
87
+ memory = psutil.virtual_memory()
88
+ memory_available = round(memory.available/1024/1024/1024, 2) # 可用内存 GB
89
+ print('Total available memory:', memory_available, 'GB')
90
+ print('Total CPU threads:', cpu_count_total)
91
+ n_sample = len(sample_names)
92
+ print('Total samples:', n_sample)
93
+ # 用户输入分配的最大内存和CPU线程数
94
+ max_memory_gb = float(input(f"Please input the maximum memory for the program (GB): 25 - {memory_available}"))
95
+ max_cpu_threads = int(input(f"Please input the maximum CPU threads for the program: 1 - {cpu_count_total}"))
96
+ assert (max_memory_gb < memory_available)&(max_memory_gb >= 25), f'max memory must be < available memory ({memory_available} GB) and >= 25 GB, current input: {max_memory_gb} GB'
97
+ assert (max_cpu_threads <= cpu_count_total)&(max_cpu_threads >= 1), f'max cpu threads must be <= total cpu threads ({cpu_count_total}) and >= 1, current input: {max_cpu_threads}'
98
+ # 计算推荐的 cpu 参数
99
+ max_task = min(int(max(max_memory_gb,30)/30), n_sample)
100
+ max_cpu_per_task = int(max_cpu_threads/max_task)
101
+ total_cpu = max_task*max_cpu_per_task
102
+
103
+ print('Assigning', max_cpu_per_task, f'CPU threads to each task. (i.e., -t {max_cpu_per_task})')
104
+ print('Number of parallel tasks:', max_task)
105
+ print(f'Please specify "--cores {total_cpu}" when formally running snakemake.')
106
+
107
+ n_threads = max_cpu_per_task
108
+ else:
109
+ n_threads = args.thread
110
+
77
111
  dict_yaml = {
78
112
  # fastq 信息
79
113
  'files_R1':dict(zip(sample_names,files_R1)),
@@ -82,7 +116,7 @@ def main():
82
116
  'input_dir':args.folder,
83
117
  'output_dir':args.outdir,
84
118
  # 运行参数
85
- 'thread':args.thread,
119
+ 'thread':n_threads,
86
120
  'index':args.index,
87
121
  'fasta':args.ref,
88
122
  'binsize':args.binsize,
@@ -1,4 +1,5 @@
1
-
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
2
3
 
3
4
  import polars as pl
4
5
  import pandas as pd
@@ -35,7 +36,7 @@ def main():
35
36
  parser.add_argument('--pam' , type=str, required=True, help='The protospacer adjacent motif' )
36
37
  parser.add_argument('--pam_location', type=str, default='downstream', help='Upstream or downstream, default is downstream (Cas9)' )
37
38
  # not used
38
- parser.add_argument('--seqfolder' , type=str, required=True, help='Actually not used in this script.Only in case you forget to remove this argument.')
39
+ parser.add_argument('--seqfolder' , type=str, default='none', help='Actually not used in this script.Only in case you forget to remove this argument.')
39
40
 
40
41
  args = parser.parse_args()
41
42
  # 2025.08.08. 增加对阳性位点的 target_location 重比对功能,避免 blast 比对后的 realign 在更大范围内的存在不准确的情况
@@ -53,7 +54,7 @@ def main():
53
54
  score_thresh = args.score
54
55
  binsize = args.binsize
55
56
  flank_max = args.flank_max
56
- flank_regions = args.flank_regions
57
+ flank_regions = args.flank_regions # 如果 analysis 时修改了这个参数没有写 1000 的话会出bug,暂时懒得改了
57
58
  smooth_times = args.smooth
58
59
  window_size = args.window
59
60
  seq_score_power = args.SeqScorePower
@@ -139,8 +140,8 @@ def main():
139
140
  ctr_sample_files = all_sample_files[bool_ctr]
140
141
  exp_sample_names = all_sample_names[bool_exp]
141
142
  ctr_sample_names = all_sample_names[bool_ctr]
142
- selected_sample_files = pd.concat([exp_sample_files,ctr_sample_files])
143
- selected_sample_names = pd.concat([exp_sample_names,ctr_sample_names]) # no use
143
+ # selected_sample_files = pd.concat([exp_sample_files,ctr_sample_files])
144
+ # selected_sample_names = pd.concat([exp_sample_names,ctr_sample_names]) # no use
144
145
 
145
146
 
146
147
 
@@ -154,8 +155,17 @@ def main():
154
155
  PAM = args.pam
155
156
  PAM_loc = args.pam_location
156
157
  # read result
157
- dp_result = pl.read_csv(f'Offtracker_result_{outname}.csv')
158
- dp_bdg = pl.read_parquet(selected_sample_files[0], separator='\t', has_header=False,
158
+ dp_result = pl.read_csv(f'./temp/df_result_{outname}.csv')
159
+ # negative for next section
160
+ bool_fdr_bkg = dp_result['fdr']>fdr_thresh
161
+ bool_score_bkg = dp_result['track_score']<score_thresh
162
+ dp_result_bkg = dp_result.filter(bool_fdr_bkg & bool_score_bkg)
163
+ # positive
164
+ bool_fdr = pl.col('fdr')<=fdr_thresh
165
+ bool_score = pl.col('track_score')>=score_thresh
166
+ dp_result = dp_result.filter(bool_fdr & bool_score)
167
+ # bdg
168
+ dp_bdg = pl.read_csv(exp_sample_files.iloc[0], separator='\t', has_header=False,
159
169
  schema_overrides={'chr':pl.String,'start':pl.Int32,'end':pl.Int32,'residual':pl.Float32})
160
170
  # check and realign
161
171
  bool_left_neg=(dp_result['exp_L_neg_1000']<-5)&(dp_result['exp_R_neg_1000']==0)
@@ -163,11 +173,13 @@ def main():
163
173
  list_good_result = []
164
174
  list_bad_left = []
165
175
  list_bad_right = []
176
+ n_left_for_correct = 0
177
+ n_right_for_correct = 0
166
178
  for a_left_bool, a_right_bool, a_row in zip(bool_left_neg, bool_right_neg, dp_result.iter_rows(named=True)):
167
179
  if a_left_bool & a_right_bool:
168
180
  raise ValueError('abnormal on both left and right')
169
181
  if a_left_bool:
170
- print('left')
182
+ n_left_for_correct += 1
171
183
  loc_shift_left = a_row['chr'] + ':' + str(a_row['st']-1000) + '-' + str(a_row['ed']-20)
172
184
  region_index = a_row['region_index']
173
185
  dp_bdg_chr = dp_bdg.filter(pl.col('chr') == a_row['chr'])
@@ -175,7 +187,7 @@ def main():
175
187
  sr_candidate.loc['region_index'] = region_index
176
188
  list_bad_left.append(sr_candidate)
177
189
  elif a_right_bool:
178
- print('right')
190
+ n_right_for_correct += 1
179
191
  loc_shift_right = a_row['chr'] + ':' + str(a_row['st']+20) + '-' + str(a_row['ed']+1000)
180
192
  region_index = a_row['region_index']
181
193
  dp_bdg_chr = dp_bdg.filter(pl.col('chr') == a_row['chr'])
@@ -188,17 +200,50 @@ def main():
188
200
  df_cand_left = pd.DataFrame(list_bad_left)
189
201
  df_cand_right = pd.DataFrame(list_bad_right)
190
202
  df_cand_realign = pd.concat([df_cand_left, df_cand_right])
203
+ if len(df_cand_realign) == 0:
204
+ print('No candidate is found for realignment.')
205
+ return 'finished'
191
206
 
207
+ # 情况判断
208
+ n_success_realign = sum(df_cand_realign['realign']=='success')
209
+ n_fail_realign = sum(df_cand_realign['realign']!='success')
210
+ if (n_success_realign == 0) and (n_fail_realign > 0):
211
+ print(f'{n_fail_realign} candidates are found for realignment, but all failed.')
212
+ return 'finished'
213
+ elif (n_success_realign > 0) and (n_fail_realign > 0):
214
+ print(f'{n_success_realign} candidates succeeded, and {n_fail_realign} candidates failed.')
215
+ else:
216
+ print(f'{n_success_realign} candidates succeeded.')
217
+
218
+ df_cand_realign = df_cand_realign[df_cand_realign['realign']=='success']
192
219
  seqfile = rf'correction_df_candidate_{outname}_realign.csv'
193
220
  df_cand_realign.to_csv(seqfile)
194
221
 
195
222
  # run offtracker_analysis with check_loc mode
196
223
  running_log = rf'correction_analysis_{outname}.log'
224
+ # list 转空格分割参数
225
+ if isinstance(pattern_exp, list):
226
+ param_pattern_exp = ' '.join(pattern_exp)
227
+ else:
228
+ param_pattern_exp = pattern_exp
229
+ if isinstance(pattern_ctr, list):
230
+ param_pattern_ctr = ' '.join(pattern_ctr)
231
+ else:
232
+ param_pattern_ctr = pattern_ctr
233
+ if isinstance(flank_regions, list):
234
+ param_flank_regions = ' '.join([str(x) for x in flank_regions])
235
+ else:
236
+ param_flank_regions = flank_regions
237
+ if isinstance(folders, list):
238
+ param_folders = ' '.join([str(x) for x in folders])
239
+ else:
240
+ param_folders = folders
241
+
197
242
  with open(running_log, "w+") as running_log:
198
243
  command = f'offtracker_analysis.py -t {args.thread} -g {args.genome} --seqfile {seqfile} --name {sgRNA_name} \
199
- --exp {pattern_exp} --control {pattern_ctr} --outname {outname}_loc_correction -f {folders} -o {outdir} \
244
+ --exp {param_pattern_exp} --control {param_pattern_ctr} --outname {outname}_loc_correction -f {param_folders} -o {outdir} \
200
245
  --fdr {fdr_thresh} --window {window_size} --smooth {smooth_times} --SeqScorePower {seq_score_power} \
201
- --score {score_thresh} --binsize {binsize} --flank_max {flank_max} --flank_regions {flank_regions} --CtrClip {ctr_clip} \
246
+ --score {score_thresh} --binsize {binsize} --flank_max {flank_max} --flank_regions {param_flank_regions} --CtrClip {ctr_clip} \
202
247
  --check_loc'
203
248
  command2 = shlex.split('bash -c "{}"'.format(command))
204
249
  process_1 = subprocess.Popen(command2, stdout=running_log, stderr=subprocess.STDOUT )
@@ -213,10 +258,7 @@ def main():
213
258
  #######################
214
259
  ## recalculate score ##
215
260
  #######################
216
- dp_result_bkg = pl.read_csv(f'./temp/df_result_{outname}.csv')
217
- bool_fdr_bkg = dp_result_bkg['fdr']>fdr_thresh
218
- bool_score_bkg = dp_result_bkg['track_score']<score_thresh
219
- dp_result_bkg = dp_result_bkg.filter(bool_fdr_bkg & bool_score_bkg)
261
+
220
262
  dp_result_realign = pl.read_csv(f'./temp/df_result_{outname}_loc_correction.csv')
221
263
 
222
264
  # 兼容旧版输出列名
@@ -256,7 +298,7 @@ def main():
256
298
  # ouput Offtracker result
257
299
  bool_fdr = pl.col('fdr')<=fdr_thresh
258
300
  bool_score = pl.col('track_score')>=score_thresh
259
- dp_output = dp_result_new.filter(bool_fdr|bool_score).copy()
301
+ dp_output = dp_result_new.filter(bool_fdr|bool_score)
260
302
  if pattern_ctr != 'none':
261
303
  dp_output = dp_output[['target_location', 'best_strand','best_target','deletion','insertion','mismatch',
262
304
  'exp_L_length', 'exp_R_length','ctr_L_length','ctr_R_length','L_length','R_length','signal_length',
@@ -271,7 +313,7 @@ def main():
271
313
  dp_output.columns = ['target_location', 'strand', 'target', 'deletion', 'insertion', 'mismatch',
272
314
  'L_length', 'R_length','signal_length',
273
315
  'seq_score', 'track_score', 'log2_track_score','FDR', 'rank']
274
- dp_output.write_csv(f'Offtracker_result_{outname}.csv')
316
+ dp_output.write_csv(f'Offtracker_result_{outname}.csv')
275
317
 
276
318
  return 'correction finished'
277
319
 
@@ -1,7 +1,9 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- THIS_VERSION = '0.4.1'
4
+ THIS_VERSION = '0.4.2'
5
+
6
+ # 2025.10.05. 0.4.2. 添加 threads 监测
5
7
 
6
8
  import argparse
7
9
  import os, glob, yaml
@@ -50,6 +52,17 @@ def main():
50
52
 
51
53
  assert not isinstance(sample_names, str), 'No fastq file is detected!'
52
54
 
55
+ ################
56
+ # threads 监测 #
57
+ ################
58
+ import psutil
59
+ n_threads = args.thread
60
+ assert n_threads > 0, f'n_threads should be greater than 0, while {n_threads} is given.'
61
+ cpu_count_total = psutil.cpu_count(logical=True) # 逻辑 CPU 总数(包括超线程)
62
+ if n_threads > cpu_count_total:
63
+ n_threads = cpu_count_total-1
64
+ print(f'n_threads is reset to {n_threads} due to the total number of threads ({cpu_count_total}).')
65
+
53
66
  dict_yaml = {
54
67
  # fastq 信息
55
68
  'files_R1':dict(zip(sample_names,files_R1)),
@@ -58,7 +71,7 @@ def main():
58
71
  'input_dir':args.folder,
59
72
  'output_dir':args.outdir,
60
73
  # 运行参数
61
- 'thread':args.thread,
74
+ 'thread':n_threads,
62
75
  'utility_dir':utility_dir
63
76
  }
64
77
 
@@ -26,7 +26,7 @@ with open(os.path.join(here, package_folder, '_version.py'),'r',encoding='utf-8'
26
26
 
27
27
  # requirements
28
28
  REQUIRED = [
29
- 'pandas', 'polars>=1.19.0', 'numpy', 'biopython<=1.85', 'pybedtools', 'pyyaml',
29
+ 'pandas', 'polars>=1.19.0', 'numpy', 'biopython<=1.85', 'pybedtools', 'pyyaml', 'psutil'
30
30
  ]
31
31
  ## pybedtools may be not supported in Windows
32
32
 
File without changes
File without changes
File without changes