offtracker 1.0.1__zip → 2.7.7__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {offtracker-1.0.1/offtracker.egg-info → offtracker-2.7.7}/PKG-INFO +13 -6
- {offtracker-1.0.1 → offtracker-2.7.7}/README.md +12 -5
- offtracker-2.7.7/offtracker/X_offplot.py +123 -0
- offtracker-2.7.7/offtracker/X_offtracker.py +338 -0
- offtracker-1.0.1/offtracker/X_general.py → offtracker-2.7.7/offtracker/X_sequence.py +18 -5
- {offtracker-1.0.1 → offtracker-2.7.7}/offtracker/__init__.py +1 -1
- offtracker-2.7.7/offtracker/_version.py +27 -0
- offtracker-2.7.7/offtracker/mapping/Snakefile_offtracker +245 -0
- offtracker-2.7.7/offtracker/mapping/offtracker_blacklist_hg38.merged.bed +3846 -0
- offtracker-2.7.7/offtracker/mapping/offtracker_blacklist_mm10.merged.bed +5827 -0
- {offtracker-1.0.1 → offtracker-2.7.7/offtracker.egg-info}/PKG-INFO +13 -6
- {offtracker-1.0.1 → offtracker-2.7.7}/offtracker.egg-info/SOURCES.txt +4 -3
- offtracker-2.7.7/scripts/offtracker_analysis.py +369 -0
- {offtracker-1.0.1 → offtracker-2.7.7}/scripts/offtracker_candidates.py +59 -101
- {offtracker-1.0.1 → offtracker-2.7.7}/scripts/offtracker_config.py +15 -10
- offtracker-1.0.1/offtracker/X_analysis.py +0 -332
- offtracker-1.0.1/offtracker/_version.py +0 -1
- offtracker-1.0.1/offtracker/mapping/Snakefile_Trackseq +0 -193
- offtracker-1.0.1/offtracker/mapping/offtracker_blacklist_hg38.merged.bed +0 -22228
- offtracker-1.0.1/offtracker/mapping/offtracker_blacklist_mm10.merged.bed +0 -9347
- offtracker-1.0.1/scripts/offtracker_analysis.py +0 -407
- {offtracker-1.0.1 → offtracker-2.7.7}/LICENSE.txt +0 -0
- {offtracker-1.0.1 → offtracker-2.7.7}/MANIFEST.in +0 -0
- {offtracker-1.0.1 → offtracker-2.7.7}/offtracker/mapping/1.1_bed2fr_v4.5.py +0 -0
- {offtracker-1.0.1 → offtracker-2.7.7}/offtracker/mapping/1.3_bdg_normalize_v4.0.py +0 -0
- {offtracker-1.0.1 → offtracker-2.7.7}/offtracker/mapping/bedGraphToBigWig +0 -0
- {offtracker-1.0.1 → offtracker-2.7.7}/offtracker/mapping/hg38.chrom.sizes +0 -0
- {offtracker-1.0.1 → offtracker-2.7.7}/offtracker/mapping/mm10.chrom.sizes +0 -0
- {offtracker-1.0.1 → offtracker-2.7.7}/offtracker.egg-info/dependency_links.txt +0 -0
- {offtracker-1.0.1 → offtracker-2.7.7}/offtracker.egg-info/requires.txt +0 -0
- {offtracker-1.0.1 → offtracker-2.7.7}/offtracker.egg-info/top_level.txt +0 -0
- {offtracker-1.0.1 → offtracker-2.7.7}/setup.cfg +0 -0
- {offtracker-1.0.1 → offtracker-2.7.7}/setup.py +0 -0
@@ -1,14 +1,17 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
|
4
|
+
# 2023.10.27. v2.0: 2.0以target_location midpoint为中心,因此取消 pct 计算
|
5
|
+
# 2023.12.06. v2.1: 2.1增加 cleavage_site 推测, 修正 deletion 错位, 以 cleavage_site 为中心
|
4
6
|
import os,sys,re,time
|
7
|
+
from itertools import product
|
5
8
|
|
6
9
|
if sys.version_info < (3,0):
|
7
10
|
import platform
|
8
11
|
raise Exception(f'python3 is needed, while running {platform.python_version()} now')
|
9
12
|
|
10
13
|
import offtracker
|
11
|
-
|
14
|
+
import offtracker.X_sequence as xseq
|
12
15
|
script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
|
13
16
|
script_folder= os.path.join(script_dir, 'mapping')
|
14
17
|
|
@@ -28,10 +31,9 @@ def main():
|
|
28
31
|
parser.add_argument('-b','--blastdb', type=str, required=True, help='blast database')
|
29
32
|
parser.add_argument('-o','--outdir' , type=str, required=True, help='The output folder')
|
30
33
|
parser.add_argument('-g','--genome' , type=str, default='hg38', help='File of chromosome sizes, or "hg38", "mm10" ')
|
31
|
-
parser.add_argument('-t','--thread' , type=int, default=4, help='Number of threads
|
34
|
+
parser.add_argument('-t','--thread' , type=int, default=4, help='Number of threads for parallel computing')
|
32
35
|
parser.add_argument('--quick_mode' , action='store_true', help='BLAST faster but less candidates.')
|
33
|
-
|
34
|
-
|
36
|
+
|
35
37
|
args = parser.parse_args()
|
36
38
|
|
37
39
|
|
@@ -50,19 +52,12 @@ def main():
|
|
50
52
|
dir_ref_fa = args.ref
|
51
53
|
blast_db = args.blastdb
|
52
54
|
quick_mode = args.quick_mode
|
53
|
-
if args.regions == 'auto':
|
54
|
-
regions = [500, 1000, 2000, 3000]
|
55
|
-
else:
|
56
|
-
regions = list(map(int, args.regions))
|
57
|
-
common_chr = pd.Series(['chr']*23).str[:] + pd.Series(range(23)).astype(str).str[:]
|
58
|
-
common_chr = pd.concat([common_chr, pd.Series(['chrX','chrY'])]).to_numpy()
|
59
55
|
|
60
56
|
# parameters for alignment
|
61
57
|
half_width = 100
|
62
58
|
pct_params = 1.0
|
63
59
|
frag_len= half_width*2
|
64
|
-
|
65
|
-
dir_df_alignment = os.path.join(dir_output, f'df_alignment_{sgRNA_name}_{location_len}.csv')
|
60
|
+
dir_df_candidate = os.path.join(dir_output, f'df_candidate_{sgRNA_name}.csv')
|
66
61
|
|
67
62
|
|
68
63
|
sgRNA_seq = sgRNA_seq.upper()
|
@@ -72,13 +67,13 @@ def main():
|
|
72
67
|
dir_sgRNA_bed = os.path.join(dir_output, f'{sgRNA_name}_PAM.bed')
|
73
68
|
|
74
69
|
|
75
|
-
possible_sgRNA_PAM = list(product([sgRNA_seq],possible_seq(PAM)))
|
70
|
+
possible_sgRNA_PAM = list(product([sgRNA_seq],xseq.possible_seq(PAM)))
|
76
71
|
possible_sgRNA_PAM = [''.join(combination) for combination in possible_sgRNA_PAM]
|
77
72
|
n_seq = len(possible_sgRNA_PAM)
|
78
73
|
|
79
74
|
ID = pd.Series(['seq']*n_seq) + pd.Series(range(1,n_seq+1)).astype(str)
|
80
75
|
df_sgRNA_PAM = pd.DataFrame({'ID':ID,'sequence':possible_sgRNA_PAM})
|
81
|
-
write_fasta(df_sgRNA_PAM, dir_sgRNA_fasta)
|
76
|
+
xseq.write_fasta(df_sgRNA_PAM, dir_sgRNA_fasta)
|
82
77
|
|
83
78
|
|
84
79
|
|
@@ -95,7 +90,7 @@ def main():
|
|
95
90
|
gapopen=4, gapextend=2, reward=2, word_size=5, dust='no', soft_masking=False)
|
96
91
|
else:
|
97
92
|
blastx_cline = NcbiblastnCommandline(query=dir_sgRNA_fasta, task='blastn-short',out=dir_sgRNA_blast,
|
98
|
-
db=blast_db, evalue=
|
93
|
+
db=blast_db, evalue=10000,outfmt=6, num_threads=n_threads,
|
99
94
|
gapopen=4, gapextend=2, reward=2, word_size=4, dust='no', soft_masking=False)
|
100
95
|
print(f'BLAST for candidate off-target sites of {sgRNA_name}.')
|
101
96
|
blastx_cline()
|
@@ -129,77 +124,28 @@ def main():
|
|
129
124
|
blast_regions = blast_regions.reindex(columns = ['chr', 'st', 'ed' , 'query acc.', '% identity', 'alignment length', 'mismatches',
|
130
125
|
'gap opens', 'q. start', 'q. end', 'evalue', 'bit score', 'reverse', 'location'] )
|
131
126
|
|
132
|
-
# 输出 bed 用于后续
|
127
|
+
# 输出 bed 用于后续 alignment score 计算
|
133
128
|
blast_regions_bed = blast_regions[['chr','st','ed']]
|
134
|
-
|
129
|
+
xseq.write_bed(blast_regions_bed, dir_sgRNA_bed)
|
135
130
|
# 对 bed 进行排序但不合并
|
136
131
|
a = pybedtools.BedTool(dir_sgRNA_bed)
|
137
132
|
a.sort(g=dir_chrom_sizes).saveas( dir_sgRNA_bed )
|
138
133
|
print(f'Output {sgRNA_name}_PAM.bed')
|
139
134
|
|
140
135
|
|
141
|
-
############################
|
142
|
-
# Output candidate regions #
|
143
|
-
############################
|
144
|
-
|
145
|
-
blast_regions_bed = X_readbed(dir_sgRNA_bed)
|
146
|
-
blast_regions_bed = blast_regions_bed[blast_regions_bed['chr'].isin(common_chr)]
|
147
|
-
blast_regions_bed['midpoint'] = ((blast_regions_bed['st'] + blast_regions_bed['ed'])/2).astype(int)
|
148
|
-
blast_regions_bed = blast_regions_bed.drop_duplicates(subset=['chr','midpoint']).copy()
|
149
|
-
for a_region in regions:
|
150
|
-
candidate_region_left = blast_regions_bed.copy()
|
151
|
-
candidate_region_left['ed'] = candidate_region_left['midpoint']
|
152
|
-
candidate_region_left['st'] = candidate_region_left['midpoint']-a_region
|
153
|
-
candidate_region_left.loc[candidate_region_left['st']<0,'st'] = 0
|
154
|
-
# 储存并排序
|
155
|
-
left_region =os.path.join(dir_output, f'{sgRNA_name}_candidate_left_{a_region}.bed')
|
156
|
-
writebed(candidate_region_left.iloc[:,:3], left_region)
|
157
|
-
a = pybedtools.BedTool(left_region)
|
158
|
-
a.sort(g=dir_chrom_sizes).saveas( left_region )
|
159
|
-
|
160
|
-
candidate_region_right = blast_regions_bed.copy()
|
161
|
-
candidate_region_right['st'] = candidate_region_right['midpoint']
|
162
|
-
candidate_region_right['ed'] = candidate_region_right['midpoint']+a_region
|
163
|
-
# 储存并排序
|
164
|
-
right_region = os.path.join(dir_output, f'{sgRNA_name}_candidate_right_{a_region}.bed')
|
165
|
-
writebed(candidate_region_right.iloc[:,:3], right_region)
|
166
|
-
a = pybedtools.BedTool(right_region)
|
167
|
-
a.sort(g=dir_chrom_sizes).saveas( right_region )
|
168
|
-
|
169
|
-
# background noise
|
170
|
-
for i in range(1,4):
|
171
|
-
candidate_region_left = blast_regions_bed.copy()
|
172
|
-
candidate_region_left['ed'] = candidate_region_left['midpoint']-5000*i
|
173
|
-
candidate_region_left['st'] = candidate_region_left['midpoint']-5000*(i+1)
|
174
|
-
candidate_region_left.loc[candidate_region_left['st']<0,'st'] = 0
|
175
|
-
candidate_region_left.loc[candidate_region_left['ed']<5000,'ed'] = 5000
|
176
|
-
# 储存并排序
|
177
|
-
left_region =os.path.join(dir_output, f'{sgRNA_name}_candidate_left_bkg{i}.bed')
|
178
|
-
writebed(candidate_region_left.iloc[:,:3], left_region)
|
179
|
-
a = pybedtools.BedTool(left_region)
|
180
|
-
a.sort(g=dir_chrom_sizes).saveas( left_region )
|
181
|
-
|
182
|
-
candidate_region_right = blast_regions_bed.copy()
|
183
|
-
candidate_region_right['st'] = candidate_region_right['midpoint']+5000*i
|
184
|
-
candidate_region_right['ed'] = candidate_region_right['midpoint']+5000*(i+1)
|
185
|
-
# 储存并排序
|
186
|
-
right_region = os.path.join(dir_output, f'{sgRNA_name}_candidate_right_bkg{i}.bed')
|
187
|
-
writebed(candidate_region_right.iloc[:,:3], right_region)
|
188
|
-
a = pybedtools.BedTool(right_region)
|
189
|
-
a.sort(g=dir_chrom_sizes).saveas( right_region )
|
190
|
-
|
191
|
-
print(f'Output candidate regions of {sgRNA_name}.')
|
192
|
-
|
193
136
|
###################
|
194
137
|
# alignment score #
|
195
138
|
###################
|
196
|
-
if os.path.isfile(
|
197
|
-
print(f'{
|
139
|
+
if os.path.isfile(dir_df_candidate):
|
140
|
+
print(f'{dir_df_candidate} exists, skipped.')
|
198
141
|
else:
|
199
142
|
#########
|
200
143
|
# 读取 blast bed
|
201
144
|
#########
|
202
|
-
|
145
|
+
common_chr = pd.Series(['chr']*23).str[:] + pd.Series(range(23)).astype(str).str[:]
|
146
|
+
common_chr = pd.concat([common_chr, pd.Series(['chrX','chrY'])]).to_numpy()
|
147
|
+
|
148
|
+
bed_short = xseq.X_readbed(dir_sgRNA_bed)
|
203
149
|
bed_short = bed_short[bed_short['chr'].isin(common_chr)].copy()
|
204
150
|
bed_short['midpoint'] = ((bed_short['st'] + bed_short['ed'])/2).astype(int)
|
205
151
|
bed_short['st'] = bed_short['midpoint'] - half_width
|
@@ -212,7 +158,7 @@ def main():
|
|
212
158
|
#########
|
213
159
|
|
214
160
|
temp_bed = os.path.join(dir_output, 'temp.bed')
|
215
|
-
|
161
|
+
xseq.write_bed(bed_short.iloc[:,:3], temp_bed)
|
216
162
|
a = pybedtools.BedTool(temp_bed)
|
217
163
|
fasta = pybedtools.example_filename(dir_ref_fa)
|
218
164
|
a = a.sequence(fi=fasta)
|
@@ -239,7 +185,7 @@ def main():
|
|
239
185
|
mismatch_score = 0.01
|
240
186
|
# 添加 PAM
|
241
187
|
sgRNA_PAM_fw = sgRNA_seq + PAM
|
242
|
-
sgRNA_PAM_rv = reverse_complement(sgRNA_PAM_fw)
|
188
|
+
sgRNA_PAM_rv = xseq.reverse_complement(sgRNA_PAM_fw)
|
243
189
|
|
244
190
|
list_args_fw=[]
|
245
191
|
list_args_rv=[]
|
@@ -249,38 +195,44 @@ def main():
|
|
249
195
|
list_args_rv.append( [a_key, sgRNA_PAM_rv, seq, frag_len, DNA_matrix, mismatch_score] )
|
250
196
|
st = time.time()
|
251
197
|
with mp.Pool(n_threads) as p:
|
252
|
-
list_align_forward = p.starmap(sgRNA_alignment, list_args_fw)
|
198
|
+
list_align_forward = p.starmap(xseq.sgRNA_alignment, list_args_fw)
|
253
199
|
ed = time.time()
|
254
200
|
print('align_forward:{:.2f}'.format(ed-st))
|
255
201
|
st = time.time()
|
256
202
|
with mp.Pool(n_threads) as p:
|
257
|
-
list_align_reverse = p.starmap(sgRNA_alignment, list_args_rv)
|
203
|
+
list_align_reverse = p.starmap(xseq.sgRNA_alignment, list_args_rv)
|
258
204
|
ed = time.time()
|
259
205
|
print('align_reverse:{:.2f}'.format(ed-st))
|
260
206
|
#
|
261
207
|
df_align_forward = pd.DataFrame(list_align_forward, columns= ['fw_score','fw_pct','fw_target','fw_location','fw_deletion','fw_insertion','fw_mismatch'])
|
262
208
|
df_align_reverse = pd.DataFrame(list_align_reverse, columns= ['rv_score','rv_pct','rv_target','rv_location','rv_deletion','rv_insertion','rv_mismatch'])
|
263
|
-
df_align_reverse['rv_target'] = df_align_reverse['rv_target'].apply(reverse_complement)
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
209
|
+
df_align_reverse['rv_target'] = df_align_reverse['rv_target'].apply(xseq.reverse_complement)
|
210
|
+
df_candidate = pd.concat([df_align_forward,df_align_reverse],axis=1)
|
211
|
+
df_candidate['location'] = fasta.keys()
|
212
|
+
df_candidate['alignment_score'] = df_candidate[['fw_score','rv_score']].max(axis=1)
|
213
|
+
#df_candidate['fw_score_2'] = df_candidate['fw_score']*(pct_params-df_candidate['fw_pct'].abs())
|
214
|
+
#df_candidate['rv_score_2'] = df_candidate['rv_score']*(pct_params-df_candidate['rv_pct'].abs())
|
215
|
+
#df_candidate['best_seq_score'] = df_candidate[['fw_score_2', 'rv_score_2']].max(axis=1)
|
216
|
+
#df_candidate['best_strand'] = df_candidate[['fw_score_2', 'rv_score_2']].idxmax(axis='columns').replace({'fw_score_2':'+', 'rv_score_2':'-'})
|
217
|
+
#df_candidate.loc[df_candidate['fw_score_2']==df_candidate['rv_score_2'],'best_strand']='equal_score'
|
218
|
+
df_candidate['best_seq_score'] = df_candidate[['fw_score', 'rv_score']].max(axis=1)
|
219
|
+
df_candidate['best_strand'] = df_candidate[['fw_score', 'rv_score']].idxmax(axis='columns').replace({'fw_score':'+', 'rv_score':'-'})
|
220
|
+
df_candidate.loc[df_candidate['fw_score']==df_candidate['rv_score'],'best_strand']='equal_score'
|
221
|
+
|
273
222
|
# GG check
|
223
|
+
# 2023.12.05 增加 cleavage_site 推测
|
274
224
|
list_best_target = []
|
275
225
|
list_best_location = []
|
226
|
+
list_cleavage_site = []
|
276
227
|
list_delete = []
|
277
228
|
list_insert = []
|
278
229
|
list_mismat = []
|
279
230
|
list_GG = []
|
280
|
-
for a_row in
|
231
|
+
for a_row in df_candidate.iterrows():
|
281
232
|
if a_row[1]['best_strand']=='+':
|
282
233
|
list_best_target.append(a_row[1]['fw_target'])
|
283
234
|
list_best_location.append(a_row[1]['fw_location'])
|
235
|
+
list_cleavage_site.append(int(a_row[1]['fw_location'].split('-')[1]) - 6)
|
284
236
|
list_delete.append(a_row[1]['fw_deletion'])
|
285
237
|
list_insert.append(a_row[1]['fw_insertion'])
|
286
238
|
list_mismat.append(a_row[1]['fw_mismatch'])
|
@@ -291,6 +243,7 @@ def main():
|
|
291
243
|
elif a_row[1]['best_strand']=='-':
|
292
244
|
list_best_target.append(a_row[1]['rv_target'])
|
293
245
|
list_best_location.append(a_row[1]['rv_location'])
|
246
|
+
list_cleavage_site.append(int(a_row[1]['rv_location'].split('-')[0].split(':')[1]) + 5)
|
294
247
|
list_delete.append(a_row[1]['rv_deletion'])
|
295
248
|
list_insert.append(a_row[1]['rv_insertion'])
|
296
249
|
list_mismat.append(a_row[1]['rv_mismatch'])
|
@@ -302,6 +255,7 @@ def main():
|
|
302
255
|
if a_row[1]['fw_target'][-2:]=='GG':
|
303
256
|
list_best_target.append(a_row[1]['fw_target'])
|
304
257
|
list_best_location.append(a_row[1]['fw_location'])
|
258
|
+
list_cleavage_site.append(int(a_row[1]['fw_location'].split('-')[1]) - 6)
|
305
259
|
list_delete.append(a_row[1]['fw_deletion'])
|
306
260
|
list_insert.append(a_row[1]['fw_insertion'])
|
307
261
|
list_mismat.append(a_row[1]['fw_mismatch'])
|
@@ -310,6 +264,7 @@ def main():
|
|
310
264
|
elif a_row[1]['rv_target'][-2:]=='GG':
|
311
265
|
list_best_target.append(a_row[1]['rv_target'])
|
312
266
|
list_best_location.append(a_row[1]['rv_location'])
|
267
|
+
list_cleavage_site.append(int(a_row[1]['rv_location'].split('-')[0].split(':')[1]) + 5)
|
313
268
|
list_delete.append(a_row[1]['rv_deletion'])
|
314
269
|
list_insert.append(a_row[1]['rv_insertion'])
|
315
270
|
list_mismat.append(a_row[1]['rv_mismatch'])
|
@@ -317,25 +272,28 @@ def main():
|
|
317
272
|
else:
|
318
273
|
list_best_target.append(a_row[1]['fw_target'])
|
319
274
|
list_best_location.append(a_row[1]['fw_location'])
|
275
|
+
list_cleavage_site.append(int(a_row[1]['fw_location'].split('-')[1]) - 6)
|
320
276
|
list_delete.append(a_row[1]['fw_deletion'])
|
321
277
|
list_insert.append(a_row[1]['fw_insertion'])
|
322
278
|
list_mismat.append(a_row[1]['fw_mismatch'])
|
323
279
|
list_GG.append('NO_same_score')
|
324
|
-
# 记入
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
280
|
+
# 记入 df_candidate
|
281
|
+
df_candidate['deletion'] = list_delete
|
282
|
+
df_candidate['insertion'] = list_insert
|
283
|
+
df_candidate['mismatch'] = list_mismat
|
284
|
+
df_candidate['GG'] = list_GG
|
285
|
+
df_candidate['best_target'] = list_best_target
|
286
|
+
df_candidate['target_location'] = list_best_location
|
287
|
+
df_candidate['cleavage_site'] = list_cleavage_site
|
331
288
|
|
332
|
-
#
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
289
|
+
# 2.0 更新一下格式
|
290
|
+
df_candidate = df_candidate.drop_duplicates(subset=['target_location']).reset_index(drop=True)
|
291
|
+
df_candidate = pd.concat([xseq.bedfmt(df_candidate['target_location']), df_candidate],axis=1)
|
292
|
+
# df_candidate['midpoint'] = ((df_candidate['ed'] + df_candidate['st'])/2).astype(int)
|
293
|
+
df_candidate = xseq.add_ID(df_candidate, midpoint='cleavage_site')
|
294
|
+
|
295
|
+
df_candidate.to_csv(dir_df_candidate)
|
296
|
+
print(f'Output df_candidate_{sgRNA_name}.csv')
|
339
297
|
os.remove(temp_bed)
|
340
298
|
|
341
299
|
return 'Done!'
|
@@ -1,6 +1,8 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
|
4
|
+
# 2023.08.11. v1.1 adding a option for not normalizing the bw file
|
5
|
+
|
4
6
|
import argparse
|
5
7
|
import os, glob, yaml
|
6
8
|
import pandas as pd
|
@@ -13,15 +15,16 @@ os.chmod( os.path.join(script_folder, 'bedGraphToBigWig'), 0o755)
|
|
13
15
|
###
|
14
16
|
parser = argparse.ArgumentParser()
|
15
17
|
parser.description='Mapping fastq files of Track-seq.'
|
16
|
-
parser.add_argument('-f','--folder', type=str, required=True,
|
17
|
-
parser.add_argument('-r','--ref' , type=str, required=True,
|
18
|
-
parser.add_argument('-i','--index' , type=str, required=True,
|
19
|
-
parser.add_argument('-g','--genome', type=str, required=True,
|
18
|
+
parser.add_argument('-f','--folder', type=str, required=True, help='Directory of the input folder' )
|
19
|
+
parser.add_argument('-r','--ref' , type=str, required=True, help='The fasta file of reference genome')
|
20
|
+
parser.add_argument('-i','--index' , type=str, required=True, help='The index file of chromap')
|
21
|
+
parser.add_argument('-g','--genome', type=str, required=True, help='File of chromosome sizes, or "hg38", "mm10" ')
|
20
22
|
parser.add_argument('-o','--outdir', type=str, default='same', help='The output folder')
|
21
|
-
parser.add_argument('--subfolder' , type=int, default=0,
|
22
|
-
parser.add_argument('-t','--thread', type=int, default=4,
|
23
|
-
parser.add_argument('--blacklist' , type=str, default='same', help='Blacklist of genome regions in bed format.')
|
24
|
-
parser.add_argument('--binsize' , type=str, default=
|
23
|
+
parser.add_argument('--subfolder' , type=int, default=0, help='subfolder level')
|
24
|
+
parser.add_argument('-t','--thread', type=int, default=4, help='Number of threads to be used')
|
25
|
+
parser.add_argument('--blacklist' , type=str, default='same', help='Blacklist of genome regions in bed format. "none" for no filter')
|
26
|
+
parser.add_argument('--binsize' , type=str, default=100, help='Bin size for calculating bw residue')
|
27
|
+
parser.add_argument('--normalize' , type=str, default='True', help='Whether to normalize the BigWig file. "True" or "False"')
|
25
28
|
|
26
29
|
args = parser.parse_args()
|
27
30
|
|
@@ -31,6 +34,8 @@ if (args.genome == 'hg38') or (args.genome == 'mm10'):
|
|
31
34
|
else:
|
32
35
|
dir_chrom_sizes = args.genome
|
33
36
|
|
37
|
+
if (args.normalize != 'True') & (args.normalize != 'False'):
|
38
|
+
raise ValueError('Please provide "True" or "False" for "--normalize"')
|
34
39
|
|
35
40
|
if args.blacklist == 'same':
|
36
41
|
assert ((args.genome == 'hg38') or (args.genome == 'mm10')), 'Please provide blacklist file, or "--blacklist none" to skip'
|
@@ -66,7 +71,6 @@ for a_type in ['_trimmed_2', '_2_val_2','_R2_val_2','_R2','_2']:
|
|
66
71
|
sample_dir = prefix.str[:-len_type]
|
67
72
|
break
|
68
73
|
|
69
|
-
|
70
74
|
if nametype is None:
|
71
75
|
# pattern 搜索模式,可能会出 bug
|
72
76
|
# find "_R2." or "_2." in prefix[0]
|
@@ -93,13 +97,14 @@ dict_yaml = {
|
|
93
97
|
'blacklist':blacklist,
|
94
98
|
'nametype':nametype,
|
95
99
|
'genomelen':dir_chrom_sizes,
|
100
|
+
'normalize':args.normalize,
|
96
101
|
'script_folder':script_folder
|
97
102
|
}
|
98
103
|
|
99
104
|
with open( os.path.join(args.outdir,'config.yaml'), 'w') as outfile:
|
100
105
|
yaml.dump(dict_yaml, outfile, default_flow_style=False)
|
101
106
|
|
102
|
-
snakefile = os.path.join(script_dir, 'mapping/
|
107
|
+
snakefile = os.path.join(script_dir, 'mapping/Snakefile_offtracker')
|
103
108
|
shutil.copy(snakefile, os.path.join(args.outdir,'Snakefile'))
|
104
109
|
|
105
110
|
|