offtracker 1.0.1__zip → 2.7.7__zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {offtracker-1.0.1/offtracker.egg-info → offtracker-2.7.7}/PKG-INFO +13 -6
  2. {offtracker-1.0.1 → offtracker-2.7.7}/README.md +12 -5
  3. offtracker-2.7.7/offtracker/X_offplot.py +123 -0
  4. offtracker-2.7.7/offtracker/X_offtracker.py +338 -0
  5. offtracker-1.0.1/offtracker/X_general.py → offtracker-2.7.7/offtracker/X_sequence.py +18 -5
  6. {offtracker-1.0.1 → offtracker-2.7.7}/offtracker/__init__.py +1 -1
  7. offtracker-2.7.7/offtracker/_version.py +27 -0
  8. offtracker-2.7.7/offtracker/mapping/Snakefile_offtracker +245 -0
  9. offtracker-2.7.7/offtracker/mapping/offtracker_blacklist_hg38.merged.bed +3846 -0
  10. offtracker-2.7.7/offtracker/mapping/offtracker_blacklist_mm10.merged.bed +5827 -0
  11. {offtracker-1.0.1 → offtracker-2.7.7/offtracker.egg-info}/PKG-INFO +13 -6
  12. {offtracker-1.0.1 → offtracker-2.7.7}/offtracker.egg-info/SOURCES.txt +4 -3
  13. offtracker-2.7.7/scripts/offtracker_analysis.py +369 -0
  14. {offtracker-1.0.1 → offtracker-2.7.7}/scripts/offtracker_candidates.py +59 -101
  15. {offtracker-1.0.1 → offtracker-2.7.7}/scripts/offtracker_config.py +15 -10
  16. offtracker-1.0.1/offtracker/X_analysis.py +0 -332
  17. offtracker-1.0.1/offtracker/_version.py +0 -1
  18. offtracker-1.0.1/offtracker/mapping/Snakefile_Trackseq +0 -193
  19. offtracker-1.0.1/offtracker/mapping/offtracker_blacklist_hg38.merged.bed +0 -22228
  20. offtracker-1.0.1/offtracker/mapping/offtracker_blacklist_mm10.merged.bed +0 -9347
  21. offtracker-1.0.1/scripts/offtracker_analysis.py +0 -407
  22. {offtracker-1.0.1 → offtracker-2.7.7}/LICENSE.txt +0 -0
  23. {offtracker-1.0.1 → offtracker-2.7.7}/MANIFEST.in +0 -0
  24. {offtracker-1.0.1 → offtracker-2.7.7}/offtracker/mapping/1.1_bed2fr_v4.5.py +0 -0
  25. {offtracker-1.0.1 → offtracker-2.7.7}/offtracker/mapping/1.3_bdg_normalize_v4.0.py +0 -0
  26. {offtracker-1.0.1 → offtracker-2.7.7}/offtracker/mapping/bedGraphToBigWig +0 -0
  27. {offtracker-1.0.1 → offtracker-2.7.7}/offtracker/mapping/hg38.chrom.sizes +0 -0
  28. {offtracker-1.0.1 → offtracker-2.7.7}/offtracker/mapping/mm10.chrom.sizes +0 -0
  29. {offtracker-1.0.1 → offtracker-2.7.7}/offtracker.egg-info/dependency_links.txt +0 -0
  30. {offtracker-1.0.1 → offtracker-2.7.7}/offtracker.egg-info/requires.txt +0 -0
  31. {offtracker-1.0.1 → offtracker-2.7.7}/offtracker.egg-info/top_level.txt +0 -0
  32. {offtracker-1.0.1 → offtracker-2.7.7}/setup.cfg +0 -0
  33. {offtracker-1.0.1 → offtracker-2.7.7}/setup.py +0 -0
@@ -1,14 +1,17 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
+ # 2023.10.27. v2.0: 2.0以target_location midpoint为中心,因此取消 pct 计算
5
+ # 2023.12.06. v2.1: 2.1增加 cleavage_site 推测, 修正 deletion 错位, 以 cleavage_site 为中心
4
6
  import os,sys,re,time
7
+ from itertools import product
5
8
 
6
9
  if sys.version_info < (3,0):
7
10
  import platform
8
11
  raise Exception(f'python3 is needed, while running {platform.python_version()} now')
9
12
 
10
13
  import offtracker
11
- from offtracker.X_general import *
14
+ import offtracker.X_sequence as xseq
12
15
  script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
13
16
  script_folder= os.path.join(script_dir, 'mapping')
14
17
 
@@ -28,10 +31,9 @@ def main():
28
31
  parser.add_argument('-b','--blastdb', type=str, required=True, help='blast database')
29
32
  parser.add_argument('-o','--outdir' , type=str, required=True, help='The output folder')
30
33
  parser.add_argument('-g','--genome' , type=str, default='hg38', help='File of chromosome sizes, or "hg38", "mm10" ')
31
- parser.add_argument('-t','--thread' , type=int, default=4, help='Number of threads to be used')
34
+ parser.add_argument('-t','--thread' , type=int, default=4, help='Number of threads for parallel computing')
32
35
  parser.add_argument('--quick_mode' , action='store_true', help='BLAST faster but less candidates.')
33
- parser.add_argument('--regions' , type=str, default='auto', nargs='+', help='Regions around candidate sites.' )
34
-
36
+
35
37
  args = parser.parse_args()
36
38
 
37
39
 
@@ -50,19 +52,12 @@ def main():
50
52
  dir_ref_fa = args.ref
51
53
  blast_db = args.blastdb
52
54
  quick_mode = args.quick_mode
53
- if args.regions == 'auto':
54
- regions = [500, 1000, 2000, 3000]
55
- else:
56
- regions = list(map(int, args.regions))
57
- common_chr = pd.Series(['chr']*23).str[:] + pd.Series(range(23)).astype(str).str[:]
58
- common_chr = pd.concat([common_chr, pd.Series(['chrX','chrY'])]).to_numpy()
59
55
 
60
56
  # parameters for alignment
61
57
  half_width = 100
62
58
  pct_params = 1.0
63
59
  frag_len= half_width*2
64
- location_len = regions[-1]
65
- dir_df_alignment = os.path.join(dir_output, f'df_alignment_{sgRNA_name}_{location_len}.csv')
60
+ dir_df_candidate = os.path.join(dir_output, f'df_candidate_{sgRNA_name}.csv')
66
61
 
67
62
 
68
63
  sgRNA_seq = sgRNA_seq.upper()
@@ -72,13 +67,13 @@ def main():
72
67
  dir_sgRNA_bed = os.path.join(dir_output, f'{sgRNA_name}_PAM.bed')
73
68
 
74
69
 
75
- possible_sgRNA_PAM = list(product([sgRNA_seq],possible_seq(PAM)))
70
+ possible_sgRNA_PAM = list(product([sgRNA_seq],xseq.possible_seq(PAM)))
76
71
  possible_sgRNA_PAM = [''.join(combination) for combination in possible_sgRNA_PAM]
77
72
  n_seq = len(possible_sgRNA_PAM)
78
73
 
79
74
  ID = pd.Series(['seq']*n_seq) + pd.Series(range(1,n_seq+1)).astype(str)
80
75
  df_sgRNA_PAM = pd.DataFrame({'ID':ID,'sequence':possible_sgRNA_PAM})
81
- write_fasta(df_sgRNA_PAM, dir_sgRNA_fasta)
76
+ xseq.write_fasta(df_sgRNA_PAM, dir_sgRNA_fasta)
82
77
 
83
78
 
84
79
 
@@ -95,7 +90,7 @@ def main():
95
90
  gapopen=4, gapextend=2, reward=2, word_size=5, dust='no', soft_masking=False)
96
91
  else:
97
92
  blastx_cline = NcbiblastnCommandline(query=dir_sgRNA_fasta, task='blastn-short',out=dir_sgRNA_blast,
98
- db=blast_db, evalue=100000,outfmt=6, num_threads=n_threads,
93
+ db=blast_db, evalue=10000,outfmt=6, num_threads=n_threads,
99
94
  gapopen=4, gapextend=2, reward=2, word_size=4, dust='no', soft_masking=False)
100
95
  print(f'BLAST for candidate off-target sites of {sgRNA_name}.')
101
96
  blastx_cline()
@@ -129,77 +124,28 @@ def main():
129
124
  blast_regions = blast_regions.reindex(columns = ['chr', 'st', 'ed' , 'query acc.', '% identity', 'alignment length', 'mismatches',
130
125
  'gap opens', 'q. start', 'q. end', 'evalue', 'bit score', 'reverse', 'location'] )
131
126
 
132
- # 输出 bed 用于后续 coverage 计算
127
+ # 输出 bed 用于后续 alignment score 计算
133
128
  blast_regions_bed = blast_regions[['chr','st','ed']]
134
- writebed(blast_regions_bed, dir_sgRNA_bed)
129
+ xseq.write_bed(blast_regions_bed, dir_sgRNA_bed)
135
130
  # 对 bed 进行排序但不合并
136
131
  a = pybedtools.BedTool(dir_sgRNA_bed)
137
132
  a.sort(g=dir_chrom_sizes).saveas( dir_sgRNA_bed )
138
133
  print(f'Output {sgRNA_name}_PAM.bed')
139
134
 
140
135
 
141
- ############################
142
- # Output candidate regions #
143
- ############################
144
-
145
- blast_regions_bed = X_readbed(dir_sgRNA_bed)
146
- blast_regions_bed = blast_regions_bed[blast_regions_bed['chr'].isin(common_chr)]
147
- blast_regions_bed['midpoint'] = ((blast_regions_bed['st'] + blast_regions_bed['ed'])/2).astype(int)
148
- blast_regions_bed = blast_regions_bed.drop_duplicates(subset=['chr','midpoint']).copy()
149
- for a_region in regions:
150
- candidate_region_left = blast_regions_bed.copy()
151
- candidate_region_left['ed'] = candidate_region_left['midpoint']
152
- candidate_region_left['st'] = candidate_region_left['midpoint']-a_region
153
- candidate_region_left.loc[candidate_region_left['st']<0,'st'] = 0
154
- # 储存并排序
155
- left_region =os.path.join(dir_output, f'{sgRNA_name}_candidate_left_{a_region}.bed')
156
- writebed(candidate_region_left.iloc[:,:3], left_region)
157
- a = pybedtools.BedTool(left_region)
158
- a.sort(g=dir_chrom_sizes).saveas( left_region )
159
-
160
- candidate_region_right = blast_regions_bed.copy()
161
- candidate_region_right['st'] = candidate_region_right['midpoint']
162
- candidate_region_right['ed'] = candidate_region_right['midpoint']+a_region
163
- # 储存并排序
164
- right_region = os.path.join(dir_output, f'{sgRNA_name}_candidate_right_{a_region}.bed')
165
- writebed(candidate_region_right.iloc[:,:3], right_region)
166
- a = pybedtools.BedTool(right_region)
167
- a.sort(g=dir_chrom_sizes).saveas( right_region )
168
-
169
- # background noise
170
- for i in range(1,4):
171
- candidate_region_left = blast_regions_bed.copy()
172
- candidate_region_left['ed'] = candidate_region_left['midpoint']-5000*i
173
- candidate_region_left['st'] = candidate_region_left['midpoint']-5000*(i+1)
174
- candidate_region_left.loc[candidate_region_left['st']<0,'st'] = 0
175
- candidate_region_left.loc[candidate_region_left['ed']<5000,'ed'] = 5000
176
- # 储存并排序
177
- left_region =os.path.join(dir_output, f'{sgRNA_name}_candidate_left_bkg{i}.bed')
178
- writebed(candidate_region_left.iloc[:,:3], left_region)
179
- a = pybedtools.BedTool(left_region)
180
- a.sort(g=dir_chrom_sizes).saveas( left_region )
181
-
182
- candidate_region_right = blast_regions_bed.copy()
183
- candidate_region_right['st'] = candidate_region_right['midpoint']+5000*i
184
- candidate_region_right['ed'] = candidate_region_right['midpoint']+5000*(i+1)
185
- # 储存并排序
186
- right_region = os.path.join(dir_output, f'{sgRNA_name}_candidate_right_bkg{i}.bed')
187
- writebed(candidate_region_right.iloc[:,:3], right_region)
188
- a = pybedtools.BedTool(right_region)
189
- a.sort(g=dir_chrom_sizes).saveas( right_region )
190
-
191
- print(f'Output candidate regions of {sgRNA_name}.')
192
-
193
136
  ###################
194
137
  # alignment score #
195
138
  ###################
196
- if os.path.isfile(dir_df_alignment):
197
- print(f'{dir_df_alignment} exists, skipped.')
139
+ if os.path.isfile(dir_df_candidate):
140
+ print(f'{dir_df_candidate} exists, skipped.')
198
141
  else:
199
142
  #########
200
143
  # 读取 blast bed
201
144
  #########
202
- bed_short = X_readbed(dir_sgRNA_bed)
145
+ common_chr = pd.Series(['chr']*23).str[:] + pd.Series(range(23)).astype(str).str[:]
146
+ common_chr = pd.concat([common_chr, pd.Series(['chrX','chrY'])]).to_numpy()
147
+
148
+ bed_short = xseq.X_readbed(dir_sgRNA_bed)
203
149
  bed_short = bed_short[bed_short['chr'].isin(common_chr)].copy()
204
150
  bed_short['midpoint'] = ((bed_short['st'] + bed_short['ed'])/2).astype(int)
205
151
  bed_short['st'] = bed_short['midpoint'] - half_width
@@ -212,7 +158,7 @@ def main():
212
158
  #########
213
159
 
214
160
  temp_bed = os.path.join(dir_output, 'temp.bed')
215
- writebed(bed_short.iloc[:,:3], temp_bed)
161
+ xseq.write_bed(bed_short.iloc[:,:3], temp_bed)
216
162
  a = pybedtools.BedTool(temp_bed)
217
163
  fasta = pybedtools.example_filename(dir_ref_fa)
218
164
  a = a.sequence(fi=fasta)
@@ -239,7 +185,7 @@ def main():
239
185
  mismatch_score = 0.01
240
186
  # 添加 PAM
241
187
  sgRNA_PAM_fw = sgRNA_seq + PAM
242
- sgRNA_PAM_rv = reverse_complement(sgRNA_PAM_fw)
188
+ sgRNA_PAM_rv = xseq.reverse_complement(sgRNA_PAM_fw)
243
189
 
244
190
  list_args_fw=[]
245
191
  list_args_rv=[]
@@ -249,38 +195,44 @@ def main():
249
195
  list_args_rv.append( [a_key, sgRNA_PAM_rv, seq, frag_len, DNA_matrix, mismatch_score] )
250
196
  st = time.time()
251
197
  with mp.Pool(n_threads) as p:
252
- list_align_forward = p.starmap(sgRNA_alignment, list_args_fw)
198
+ list_align_forward = p.starmap(xseq.sgRNA_alignment, list_args_fw)
253
199
  ed = time.time()
254
200
  print('align_forward:{:.2f}'.format(ed-st))
255
201
  st = time.time()
256
202
  with mp.Pool(n_threads) as p:
257
- list_align_reverse = p.starmap(sgRNA_alignment, list_args_rv)
203
+ list_align_reverse = p.starmap(xseq.sgRNA_alignment, list_args_rv)
258
204
  ed = time.time()
259
205
  print('align_reverse:{:.2f}'.format(ed-st))
260
206
  #
261
207
  df_align_forward = pd.DataFrame(list_align_forward, columns= ['fw_score','fw_pct','fw_target','fw_location','fw_deletion','fw_insertion','fw_mismatch'])
262
208
  df_align_reverse = pd.DataFrame(list_align_reverse, columns= ['rv_score','rv_pct','rv_target','rv_location','rv_deletion','rv_insertion','rv_mismatch'])
263
- df_align_reverse['rv_target'] = df_align_reverse['rv_target'].apply(reverse_complement)
264
- df_alignment = pd.concat([df_align_forward,df_align_reverse],axis=1)
265
- df_alignment['location'] = fasta.keys()
266
- df_alignment['alignment_score'] = df_alignment[['fw_score','rv_score']].max(axis=1)
267
- df_alignment['fw_score_2'] = df_alignment['fw_score']*(pct_params-df_alignment['fw_pct'].abs())
268
- df_alignment['rv_score_2'] = df_alignment['rv_score']*(pct_params-df_alignment['rv_pct'].abs())
269
- df_alignment['best_seq_score'] = df_alignment[['fw_score_2', 'rv_score_2']].max(axis=1)
270
- df_alignment['best_strand'] = df_alignment[['fw_score_2', 'rv_score_2']].idxmax(axis='columns').replace({'fw_score_2':'+', 'rv_score_2':'-'})
271
- df_alignment.loc[df_alignment['fw_score_2']==df_alignment['rv_score_2'],'best_strand']='equal_score'
272
-
209
+ df_align_reverse['rv_target'] = df_align_reverse['rv_target'].apply(xseq.reverse_complement)
210
+ df_candidate = pd.concat([df_align_forward,df_align_reverse],axis=1)
211
+ df_candidate['location'] = fasta.keys()
212
+ df_candidate['alignment_score'] = df_candidate[['fw_score','rv_score']].max(axis=1)
213
+ #df_candidate['fw_score_2'] = df_candidate['fw_score']*(pct_params-df_candidate['fw_pct'].abs())
214
+ #df_candidate['rv_score_2'] = df_candidate['rv_score']*(pct_params-df_candidate['rv_pct'].abs())
215
+ #df_candidate['best_seq_score'] = df_candidate[['fw_score_2', 'rv_score_2']].max(axis=1)
216
+ #df_candidate['best_strand'] = df_candidate[['fw_score_2', 'rv_score_2']].idxmax(axis='columns').replace({'fw_score_2':'+', 'rv_score_2':'-'})
217
+ #df_candidate.loc[df_candidate['fw_score_2']==df_candidate['rv_score_2'],'best_strand']='equal_score'
218
+ df_candidate['best_seq_score'] = df_candidate[['fw_score', 'rv_score']].max(axis=1)
219
+ df_candidate['best_strand'] = df_candidate[['fw_score', 'rv_score']].idxmax(axis='columns').replace({'fw_score':'+', 'rv_score':'-'})
220
+ df_candidate.loc[df_candidate['fw_score']==df_candidate['rv_score'],'best_strand']='equal_score'
221
+
273
222
  # GG check
223
+ # 2023.12.05 增加 cleavage_site 推测
274
224
  list_best_target = []
275
225
  list_best_location = []
226
+ list_cleavage_site = []
276
227
  list_delete = []
277
228
  list_insert = []
278
229
  list_mismat = []
279
230
  list_GG = []
280
- for a_row in df_alignment.iterrows():
231
+ for a_row in df_candidate.iterrows():
281
232
  if a_row[1]['best_strand']=='+':
282
233
  list_best_target.append(a_row[1]['fw_target'])
283
234
  list_best_location.append(a_row[1]['fw_location'])
235
+ list_cleavage_site.append(int(a_row[1]['fw_location'].split('-')[1]) - 6)
284
236
  list_delete.append(a_row[1]['fw_deletion'])
285
237
  list_insert.append(a_row[1]['fw_insertion'])
286
238
  list_mismat.append(a_row[1]['fw_mismatch'])
@@ -291,6 +243,7 @@ def main():
291
243
  elif a_row[1]['best_strand']=='-':
292
244
  list_best_target.append(a_row[1]['rv_target'])
293
245
  list_best_location.append(a_row[1]['rv_location'])
246
+ list_cleavage_site.append(int(a_row[1]['rv_location'].split('-')[0].split(':')[1]) + 5)
294
247
  list_delete.append(a_row[1]['rv_deletion'])
295
248
  list_insert.append(a_row[1]['rv_insertion'])
296
249
  list_mismat.append(a_row[1]['rv_mismatch'])
@@ -302,6 +255,7 @@ def main():
302
255
  if a_row[1]['fw_target'][-2:]=='GG':
303
256
  list_best_target.append(a_row[1]['fw_target'])
304
257
  list_best_location.append(a_row[1]['fw_location'])
258
+ list_cleavage_site.append(int(a_row[1]['fw_location'].split('-')[1]) - 6)
305
259
  list_delete.append(a_row[1]['fw_deletion'])
306
260
  list_insert.append(a_row[1]['fw_insertion'])
307
261
  list_mismat.append(a_row[1]['fw_mismatch'])
@@ -310,6 +264,7 @@ def main():
310
264
  elif a_row[1]['rv_target'][-2:]=='GG':
311
265
  list_best_target.append(a_row[1]['rv_target'])
312
266
  list_best_location.append(a_row[1]['rv_location'])
267
+ list_cleavage_site.append(int(a_row[1]['rv_location'].split('-')[0].split(':')[1]) + 5)
313
268
  list_delete.append(a_row[1]['rv_deletion'])
314
269
  list_insert.append(a_row[1]['rv_insertion'])
315
270
  list_mismat.append(a_row[1]['rv_mismatch'])
@@ -317,25 +272,28 @@ def main():
317
272
  else:
318
273
  list_best_target.append(a_row[1]['fw_target'])
319
274
  list_best_location.append(a_row[1]['fw_location'])
275
+ list_cleavage_site.append(int(a_row[1]['fw_location'].split('-')[1]) - 6)
320
276
  list_delete.append(a_row[1]['fw_deletion'])
321
277
  list_insert.append(a_row[1]['fw_insertion'])
322
278
  list_mismat.append(a_row[1]['fw_mismatch'])
323
279
  list_GG.append('NO_same_score')
324
- # 记入 df_alignment
325
- df_alignment['deletion'] = list_delete
326
- df_alignment['insertion'] = list_insert
327
- df_alignment['mismatch'] = list_mismat
328
- df_alignment['GG'] = list_GG
329
- df_alignment['best_target'] = list_best_target
330
- df_alignment['target_location'] = list_best_location
280
+ # 记入 df_candidate
281
+ df_candidate['deletion'] = list_delete
282
+ df_candidate['insertion'] = list_insert
283
+ df_candidate['mismatch'] = list_mismat
284
+ df_candidate['GG'] = list_GG
285
+ df_candidate['best_target'] = list_best_target
286
+ df_candidate['target_location'] = list_best_location
287
+ df_candidate['cleavage_site'] = list_cleavage_site
331
288
 
332
- # df_pivot 一致,左右各 location_len
333
- bed_short['st'] = bed_short['midpoint'] - location_len
334
- bed_short['ed'] = bed_short['midpoint'] + location_len
335
- bed_short.loc[bed_short['st']<0,'st']=0
336
- df_alignment.index = igvfmt(bed_short)
337
- df_alignment.to_csv(dir_df_alignment)
338
- print(f'Output df_alignment_{sgRNA_name}_{location_len}.csv')
289
+ # 2.0 更新一下格式
290
+ df_candidate = df_candidate.drop_duplicates(subset=['target_location']).reset_index(drop=True)
291
+ df_candidate = pd.concat([xseq.bedfmt(df_candidate['target_location']), df_candidate],axis=1)
292
+ # df_candidate['midpoint'] = ((df_candidate['ed'] + df_candidate['st'])/2).astype(int)
293
+ df_candidate = xseq.add_ID(df_candidate, midpoint='cleavage_site')
294
+
295
+ df_candidate.to_csv(dir_df_candidate)
296
+ print(f'Output df_candidate_{sgRNA_name}.csv')
339
297
  os.remove(temp_bed)
340
298
 
341
299
  return 'Done!'
@@ -1,6 +1,8 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
+ # 2023.08.11. v1.1 adding a option for not normalizing the bw file
5
+
4
6
  import argparse
5
7
  import os, glob, yaml
6
8
  import pandas as pd
@@ -13,15 +15,16 @@ os.chmod( os.path.join(script_folder, 'bedGraphToBigWig'), 0o755)
13
15
  ###
14
16
  parser = argparse.ArgumentParser()
15
17
  parser.description='Mapping fastq files of Track-seq.'
16
- parser.add_argument('-f','--folder', type=str, required=True, help='Directory of the input folder' )
17
- parser.add_argument('-r','--ref' , type=str, required=True, help='The fasta file of reference genome')
18
- parser.add_argument('-i','--index' , type=str, required=True, help='The index file of chromap')
19
- parser.add_argument('-g','--genome', type=str, required=True, help='File of chromosome sizes, or "hg38", "mm10" ')
18
+ parser.add_argument('-f','--folder', type=str, required=True, help='Directory of the input folder' )
19
+ parser.add_argument('-r','--ref' , type=str, required=True, help='The fasta file of reference genome')
20
+ parser.add_argument('-i','--index' , type=str, required=True, help='The index file of chromap')
21
+ parser.add_argument('-g','--genome', type=str, required=True, help='File of chromosome sizes, or "hg38", "mm10" ')
20
22
  parser.add_argument('-o','--outdir', type=str, default='same', help='The output folder')
21
- parser.add_argument('--subfolder' , type=int, default=0, help='subfolder level')
22
- parser.add_argument('-t','--thread', type=int, default=4, help='Number of threads to be used')
23
- parser.add_argument('--blacklist' , type=str, default='same', help='Blacklist of genome regions in bed format.')
24
- parser.add_argument('--binsize' , type=str, default=10, help='Bin size for calculating bw ratio')
23
+ parser.add_argument('--subfolder' , type=int, default=0, help='subfolder level')
24
+ parser.add_argument('-t','--thread', type=int, default=4, help='Number of threads to be used')
25
+ parser.add_argument('--blacklist' , type=str, default='same', help='Blacklist of genome regions in bed format. "none" for no filter')
26
+ parser.add_argument('--binsize' , type=str, default=100, help='Bin size for calculating bw residue')
27
+ parser.add_argument('--normalize' , type=str, default='True', help='Whether to normalize the BigWig file. "True" or "False"')
25
28
 
26
29
  args = parser.parse_args()
27
30
 
@@ -31,6 +34,8 @@ if (args.genome == 'hg38') or (args.genome == 'mm10'):
31
34
  else:
32
35
  dir_chrom_sizes = args.genome
33
36
 
37
+ if (args.normalize != 'True') & (args.normalize != 'False'):
38
+ raise ValueError('Please provide "True" or "False" for "--normalize"')
34
39
 
35
40
  if args.blacklist == 'same':
36
41
  assert ((args.genome == 'hg38') or (args.genome == 'mm10')), 'Please provide blacklist file, or "--blacklist none" to skip'
@@ -66,7 +71,6 @@ for a_type in ['_trimmed_2', '_2_val_2','_R2_val_2','_R2','_2']:
66
71
  sample_dir = prefix.str[:-len_type]
67
72
  break
68
73
 
69
-
70
74
  if nametype is None:
71
75
  # pattern 搜索模式,可能会出 bug
72
76
  # find "_R2." or "_2." in prefix[0]
@@ -93,13 +97,14 @@ dict_yaml = {
93
97
  'blacklist':blacklist,
94
98
  'nametype':nametype,
95
99
  'genomelen':dir_chrom_sizes,
100
+ 'normalize':args.normalize,
96
101
  'script_folder':script_folder
97
102
  }
98
103
 
99
104
  with open( os.path.join(args.outdir,'config.yaml'), 'w') as outfile:
100
105
  yaml.dump(dict_yaml, outfile, default_flow_style=False)
101
106
 
102
- snakefile = os.path.join(script_dir, 'mapping/Snakefile_Trackseq')
107
+ snakefile = os.path.join(script_dir, 'mapping/Snakefile_offtracker')
103
108
  shutil.copy(snakefile, os.path.join(args.outdir,'Snakefile'))
104
109
 
105
110