offtracker 2.7.10__zip → 2.10.0__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {offtracker-2.7.10/offtracker.egg-info → offtracker-2.10.0}/PKG-INFO +62 -18
- {offtracker-2.7.10 → offtracker-2.10.0}/README.md +62 -18
- {offtracker-2.7.10 → offtracker-2.10.0}/offtracker/X_offplot.py +13 -2
- {offtracker-2.7.10 → offtracker-2.10.0}/offtracker/X_sequence.py +113 -7
- {offtracker-2.7.10 → offtracker-2.10.0}/offtracker/_version.py +8 -2
- offtracker-2.10.0/offtracker/snakefile/Snakefile_QC.smk +66 -0
- offtracker-2.10.0/offtracker/snakefile/Snakefile_offtracker.smk +249 -0
- offtracker-2.7.10/offtracker/mapping/1.1_bed2fr_v4.5.py → offtracker-2.10.0/offtracker/utility/1.1_bed2fr.py +6 -4
- {offtracker-2.7.10 → offtracker-2.10.0/offtracker.egg-info}/PKG-INFO +62 -18
- offtracker-2.10.0/offtracker.egg-info/SOURCES.txt +28 -0
- {offtracker-2.7.10 → offtracker-2.10.0}/scripts/offtracker_analysis.py +10 -3
- offtracker-2.10.0/scripts/offtracker_candidates.py +318 -0
- {offtracker-2.7.10 → offtracker-2.10.0}/scripts/offtracker_config.py +28 -44
- offtracker-2.10.0/scripts/offtracker_qc.py +62 -0
- {offtracker-2.7.10 → offtracker-2.10.0}/setup.py +5 -4
- offtracker-2.7.10/offtracker/mapping/Snakefile_offtracker +0 -245
- offtracker-2.7.10/offtracker.egg-info/SOURCES.txt +0 -26
- offtracker-2.7.10/scripts/offtracker_candidates.py +0 -307
- {offtracker-2.7.10 → offtracker-2.10.0}/LICENSE.txt +0 -0
- {offtracker-2.7.10 → offtracker-2.10.0}/MANIFEST.in +0 -0
- {offtracker-2.7.10 → offtracker-2.10.0}/offtracker/X_offtracker.py +0 -0
- {offtracker-2.7.10 → offtracker-2.10.0}/offtracker/__init__.py +0 -0
- {offtracker-2.7.10/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/1.3_bdg_normalize_v4.0.py +0 -0
- {offtracker-2.7.10/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/bedGraphToBigWig +0 -0
- {offtracker-2.7.10/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/hg38.chrom.sizes +0 -0
- {offtracker-2.7.10/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/mm10.chrom.sizes +0 -0
- {offtracker-2.7.10/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/offtracker_blacklist_hg38.merged.bed +0 -0
- {offtracker-2.7.10/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/offtracker_blacklist_mm10.merged.bed +0 -0
- {offtracker-2.7.10 → offtracker-2.10.0}/offtracker.egg-info/dependency_links.txt +0 -0
- {offtracker-2.7.10 → offtracker-2.10.0}/offtracker.egg-info/requires.txt +0 -0
- {offtracker-2.7.10 → offtracker-2.10.0}/offtracker.egg-info/top_level.txt +0 -0
- {offtracker-2.7.10 → offtracker-2.10.0}/scripts/offtracker_plot.py +0 -0
- {offtracker-2.7.10 → offtracker-2.10.0}/setup.cfg +0 -0
@@ -0,0 +1,249 @@
|
|
1
|
+
# 2023.08.11. adding a option for not normalizing the bw file
|
2
|
+
# 2024.01.23. add --fixedStep to bigwigCompare for not merging neighbouring bins with equal values.
|
3
|
+
# 2025.05.22. refine the structure
|
4
|
+
|
5
|
+
configfile: "config.yaml"
|
6
|
+
|
7
|
+
# # fastq 信息
|
8
|
+
_files_R1 = config['files_R1'] # dict型, key 为 sample
|
9
|
+
_files_R2 = config['files_R2'] # dict型, key 为 sample
|
10
|
+
# # 运行参数
|
11
|
+
_output_dir = config["output_dir"]
|
12
|
+
_thread = config['thread']
|
13
|
+
_BinSize = str(config["binsize"])
|
14
|
+
_normalize = config["normalize"]
|
15
|
+
|
16
|
+
|
17
|
+
import os
|
18
|
+
|
19
|
+
if _normalize == "True":
|
20
|
+
rule all:
|
21
|
+
input:
|
22
|
+
expand( os.path.join(_output_dir,"{sample}.fw.bed"), sample=_files_R1 ),
|
23
|
+
expand( os.path.join(_output_dir,"{sample}.rv.bed"), sample=_files_R1 ),
|
24
|
+
expand( os.path.join(_output_dir,"{sample}.fw.scaled.bw"), sample=_files_R1 ),
|
25
|
+
expand( os.path.join(_output_dir,"{sample}.rv.scaled.bw"), sample=_files_R1 ),
|
26
|
+
expand( os.path.join(_output_dir,"{sample}." + _BinSize + ".add.bdg"),sample=_files_R1 ),
|
27
|
+
elif _normalize == "False":
|
28
|
+
rule all:
|
29
|
+
input:
|
30
|
+
expand( os.path.join(_output_dir,"{sample}.fw.bed"), sample=_files_R1 ),
|
31
|
+
expand( os.path.join(_output_dir,"{sample}.rv.bed"), sample=_files_R1 ),
|
32
|
+
expand( os.path.join(_output_dir,"{sample}.fw.raw.bw"), sample=_files_R1 ),
|
33
|
+
expand( os.path.join(_output_dir,"{sample}.rv.raw.bw"), sample=_files_R1 ),
|
34
|
+
else:
|
35
|
+
raise ValueError('Please provide "True" or "False" for "--normalize" when running offtracker_config.py')
|
36
|
+
|
37
|
+
|
38
|
+
rule chromap:
|
39
|
+
input:
|
40
|
+
R1=lambda w: _files_R1[w.sample],
|
41
|
+
R2=lambda w: _files_R2[w.sample]
|
42
|
+
threads:
|
43
|
+
_threads
|
44
|
+
params:
|
45
|
+
index=config["index"],
|
46
|
+
fasta=config["fasta"]
|
47
|
+
output:
|
48
|
+
temp(os.path.join(_output_dir,"{sample}.chromapx.bed"))
|
49
|
+
shell:
|
50
|
+
"""
|
51
|
+
chromap -l 3000 --low-mem --BED --remove-pcr-duplicates \
|
52
|
+
--min-read-length 10 --allocate-multi-mappings \
|
53
|
+
-x {params.index} -r {params.fasta} -t {threads} -1 {input.R1} -2 {input.R2} -o {output}
|
54
|
+
"""
|
55
|
+
|
56
|
+
if config["blacklist"] != 'none':
|
57
|
+
rule remove_blacklist:
|
58
|
+
input:
|
59
|
+
os.path.join(_output_dir,"{sample}.chromapx.bed")
|
60
|
+
threads:
|
61
|
+
_threads
|
62
|
+
params:
|
63
|
+
blacklist=config["blacklist"]
|
64
|
+
output:
|
65
|
+
temp(os.path.join(_output_dir,"{sample}.filtered.bed"))
|
66
|
+
shell:
|
67
|
+
"bedtools intersect -a {input} -b {params.blacklist} -v > {output}"
|
68
|
+
|
69
|
+
rule bed2fr:
|
70
|
+
input:
|
71
|
+
os.path.join(_output_dir,"{sample}.filtered.bed")
|
72
|
+
threads:
|
73
|
+
_threads
|
74
|
+
params:
|
75
|
+
dir_script=config["utility_dir"],
|
76
|
+
ignore_chr=config["ignore_chr"],
|
77
|
+
output:
|
78
|
+
fw=os.path.join(_output_dir,"{sample}.fw.bed"),
|
79
|
+
rv=os.path.join(_output_dir,"{sample}.rv.bed")
|
80
|
+
shell:
|
81
|
+
"python {params.dir_script}/1.1_bed2fr.py -b {input} {params.ignore_chr}"
|
82
|
+
else:
|
83
|
+
rule bed2fr:
|
84
|
+
input:
|
85
|
+
os.path.join(_output_dir,"{sample}.chromapx.bed")
|
86
|
+
threads:
|
87
|
+
_threads
|
88
|
+
params:
|
89
|
+
dir_script=config["utility_dir"],
|
90
|
+
ignore_chr=config["ignore_chr"],
|
91
|
+
output:
|
92
|
+
fw=os.path.join(_output_dir,"{sample}.fw.bed"),
|
93
|
+
rv=os.path.join(_output_dir,"{sample}.rv.bed")
|
94
|
+
shell:
|
95
|
+
"python {params.dir_script}/1.1_bed2fr.py -b {input} {params.ignore_chr}"
|
96
|
+
|
97
|
+
rule bed2bdg_fw:
|
98
|
+
input:
|
99
|
+
os.path.join(_output_dir,"{sample}.fw.bed")
|
100
|
+
threads:
|
101
|
+
_threads
|
102
|
+
params:
|
103
|
+
gl=config["genomelen"]
|
104
|
+
output:
|
105
|
+
temp(os.path.join(_output_dir,"{sample}.fw.bdg"))
|
106
|
+
shell:
|
107
|
+
"bedtools genomecov -bg -i {input} -g {params.gl} > {output}"
|
108
|
+
|
109
|
+
rule bed2bdg_rv:
|
110
|
+
input:
|
111
|
+
os.path.join(_output_dir,"{sample}.rv.bed")
|
112
|
+
threads:
|
113
|
+
_threads
|
114
|
+
params:
|
115
|
+
gl=config["genomelen"]
|
116
|
+
output:
|
117
|
+
temp(os.path.join(_output_dir,"{sample}.rv.bdg"))
|
118
|
+
shell:
|
119
|
+
"bedtools genomecov -bg -i {input} -g {params.gl} > {output}"
|
120
|
+
|
121
|
+
rule bdg_sort_fw:
|
122
|
+
input:
|
123
|
+
fw=os.path.join(_output_dir,"{sample}.fw.bdg")
|
124
|
+
threads:
|
125
|
+
_threads
|
126
|
+
output:
|
127
|
+
temp(os.path.join(_output_dir,"{sample}.fw.sorted.bdg"))
|
128
|
+
shell:
|
129
|
+
"bedtools sort -i {input.fw} > {output}"
|
130
|
+
|
131
|
+
rule bdg_sort_rv:
|
132
|
+
input:
|
133
|
+
rv=os.path.join(_output_dir,"{sample}.rv.bdg")
|
134
|
+
threads:
|
135
|
+
_threads
|
136
|
+
output:
|
137
|
+
temp(os.path.join(_output_dir,"{sample}.rv.sorted.bdg"))
|
138
|
+
shell:
|
139
|
+
"bedtools sort -i {input.rv} > {output}"
|
140
|
+
|
141
|
+
if _normalize == "True":
|
142
|
+
rule bdg_normalize_fw:
|
143
|
+
input:
|
144
|
+
bdg=os.path.join(_output_dir,"{sample}.fw.sorted.bdg"),
|
145
|
+
bed=os.path.join(_output_dir,"{sample}.fw.bed")
|
146
|
+
threads:
|
147
|
+
_threads
|
148
|
+
params:
|
149
|
+
dir_script=config["utility_dir"]
|
150
|
+
output:
|
151
|
+
temp(os.path.join(_output_dir,"{sample}.fw.scaled.bdg"))
|
152
|
+
shell:
|
153
|
+
"python {params.dir_script}/1.3_bdg_normalize_v4.0.py --bdg {input.bdg} --bed {input.bed}"
|
154
|
+
|
155
|
+
rule bdg_normalize_rv:
|
156
|
+
input:
|
157
|
+
bdg=os.path.join(_output_dir,"{sample}.rv.sorted.bdg"),
|
158
|
+
bed=os.path.join(_output_dir,"{sample}.rv.bed")
|
159
|
+
threads:
|
160
|
+
_threads
|
161
|
+
params:
|
162
|
+
dir_script=config["utility_dir"]
|
163
|
+
output:
|
164
|
+
temp(os.path.join(_output_dir,"{sample}.rv.scaled.bdg"))
|
165
|
+
shell:
|
166
|
+
"python {params.dir_script}/1.3_bdg_normalize_v4.0.py --bdg {input.bdg} --bed {input.bed}"
|
167
|
+
|
168
|
+
rule bdg2bw_fw:
|
169
|
+
input:
|
170
|
+
os.path.join(_output_dir,"{sample}.fw.scaled.bdg")
|
171
|
+
threads:
|
172
|
+
_threads
|
173
|
+
params:
|
174
|
+
gl=config["genomelen"],
|
175
|
+
dir_script=config["utility_dir"]
|
176
|
+
output:
|
177
|
+
os.path.join(_output_dir,"{sample}.fw.scaled.bw")
|
178
|
+
shell:
|
179
|
+
"{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"
|
180
|
+
|
181
|
+
rule bdg2bw_rv:
|
182
|
+
input:
|
183
|
+
os.path.join(_output_dir,"{sample}.rv.scaled.bdg")
|
184
|
+
threads:
|
185
|
+
_threads
|
186
|
+
params:
|
187
|
+
gl=config["genomelen"],
|
188
|
+
dir_script=config["utility_dir"]
|
189
|
+
output:
|
190
|
+
os.path.join(_output_dir,"{sample}.rv.scaled.bw")
|
191
|
+
shell:
|
192
|
+
"{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"
|
193
|
+
|
194
|
+
rule bwAdd:
|
195
|
+
input:
|
196
|
+
fw=os.path.join(_output_dir,"{sample}.fw.scaled.bw"),
|
197
|
+
rv=os.path.join(_output_dir,"{sample}.rv.scaled.bw")
|
198
|
+
threads:
|
199
|
+
_threads
|
200
|
+
output:
|
201
|
+
os.path.join(_output_dir,"{sample}." + _BinSize + ".add.bdg")
|
202
|
+
shell:
|
203
|
+
"""
|
204
|
+
bigwigCompare --binSize {_BinSize} -p {threads} --verbose -o {output} \
|
205
|
+
--outFileFormat bedgraph --fixedStep \
|
206
|
+
--bigwig1 {input.fw} \
|
207
|
+
--bigwig2 {input.rv} \
|
208
|
+
--operation add
|
209
|
+
"""
|
210
|
+
else:
|
211
|
+
rule bdg_reverse_rv:
|
212
|
+
input:
|
213
|
+
os.path.join(_output_dir,"{sample}.rv.sorted.bdg")
|
214
|
+
threads:
|
215
|
+
_threads
|
216
|
+
output:
|
217
|
+
temp(os.path.join(_output_dir,"{sample}.rv.sorted_r.bdg"))
|
218
|
+
shell:
|
219
|
+
"awk -F '\t' -v OFS='\t' '{{$4=-$4; print}}' {input} > {output}"
|
220
|
+
|
221
|
+
rule bdg2bw_fw:
|
222
|
+
input:
|
223
|
+
os.path.join(_output_dir,"{sample}.fw.sorted.bdg")
|
224
|
+
threads:
|
225
|
+
_threads
|
226
|
+
params:
|
227
|
+
gl=config["genomelen"],
|
228
|
+
dir_script=config["utility_dir"]
|
229
|
+
output:
|
230
|
+
os.path.join(_output_dir,"{sample}.fw.raw.bw")
|
231
|
+
shell:
|
232
|
+
"{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"
|
233
|
+
|
234
|
+
rule bdg2bw_rv:
|
235
|
+
input:
|
236
|
+
os.path.join(_output_dir,"{sample}.rv.sorted_r.bdg")
|
237
|
+
threads:
|
238
|
+
_threads
|
239
|
+
params:
|
240
|
+
gl=config["genomelen"],
|
241
|
+
dir_script=config["utility_dir"]
|
242
|
+
output:
|
243
|
+
os.path.join(_output_dir,"{sample}.rv.raw.bw")
|
244
|
+
shell:
|
245
|
+
"{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"
|
246
|
+
|
247
|
+
|
248
|
+
|
249
|
+
|
@@ -8,19 +8,21 @@ parser.description='这算一个小彩蛋'
|
|
8
8
|
# 2022.10.21. v3.0: 文件名长度 chromap -> filtered
|
9
9
|
# 2022.10.26. v4.0: f,r 改成 fw,rv
|
10
10
|
# 2022.01.11. v4.5: 只取 common chromosomes (chr1-chr22, chrX, chrY, chrM)
|
11
|
+
# 2025.06.05. v5.0: 增加 ignore_chr 选项,默认只取 common chromosomes
|
11
12
|
|
12
13
|
# 单文件处理脚本,配合snakemake使用
|
13
14
|
|
14
15
|
parser.add_argument("-b", "--bed", type=str, metavar="dir_bed" , required=True, help="dir of bed file")
|
16
|
+
parser.add_argument('--ignore_chr', action='store_true', help='If not set, only chr1-chr22, chrX, chrY, chrM will be analyzed.')
|
15
17
|
|
16
18
|
args = parser.parse_args()
|
17
19
|
|
18
20
|
bed_file = pd.read_csv( args.bed, sep='\t', header=None)
|
19
21
|
|
20
|
-
|
21
|
-
common_chr = pd.
|
22
|
-
|
23
|
-
bed_file = bed_file[bed_file[0].isin(common_chr)]
|
22
|
+
if not args.ignore_chr:
|
23
|
+
common_chr = pd.Series(['chr']*22).str[:] + pd.Series(range(1,23)).astype(str).str[:]
|
24
|
+
common_chr = pd.concat([common_chr, pd.Series(['chrX','chrY','chrM'])]).to_numpy()
|
25
|
+
bed_file = bed_file[bed_file[0].isin(common_chr)]
|
24
26
|
|
25
27
|
bed_f = bed_file[bed_file[5]=='+']
|
26
28
|
bed_r = bed_file[bed_file[5]=='-']
|
@@ -1,10 +1,10 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: offtracker
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.10.0
|
4
4
|
Summary: Tracking-seq data analysis
|
5
5
|
Home-page: https://github.com/Lan-lab/offtracker
|
6
6
|
Author: Runda Xu
|
7
|
-
Author-email:
|
7
|
+
Author-email: xrd18@tsinghua.org.cn
|
8
8
|
Requires-Python: >=3.6.0
|
9
9
|
Description-Content-Type: text/markdown
|
10
10
|
License-File: LICENSE.txt
|
@@ -22,9 +22,10 @@ OFF-TRACKER is an end to end pipeline of Tracking-seq data analysis for detectin
|
|
22
22
|
## Dependency
|
23
23
|
|
24
24
|
```bash
|
25
|
-
# We recommend creating a new
|
25
|
+
# We recommend creating a new environment using mamba/conda to avoid compatibility problems
|
26
26
|
# If you don't use mamba, just replace the code with conda
|
27
|
-
|
27
|
+
# Windows systems may not be compatible with pybedtools.
|
28
|
+
mamba create -n offtracker -c bioconda blast snakemake pybedtools chromap
|
28
29
|
```
|
29
30
|
|
30
31
|
|
@@ -58,32 +59,69 @@ chromap -i -r /Your_Path_To_Reference/hg38_genome.fa \
|
|
58
59
|
-o /Your_Path_To_Reference/hg38_genome.chromap.index
|
59
60
|
|
60
61
|
# Generate candidate regions by sgRNA sequence (need once for each genome and sgRNA)
|
61
|
-
# --name:
|
62
|
+
# --name: a user-defined name of the sgRNA, which will be used in the following analysis.
|
62
63
|
offtracker_candidates.py -t 8 -g hg38 \
|
63
64
|
-r /Your_Path_To_Reference/hg38_genome.fa \
|
64
65
|
-b /Your_Path_To_Reference/hg38_genome.blastdb \
|
65
66
|
--name 'VEGFA2' --sgrna 'GACCCCCTCCACCCCGCCTC' --pam 'NGG' \
|
66
|
-
-o /
|
67
|
+
-o /Your_Path_To_Candidates_Folder
|
67
68
|
|
68
69
|
```
|
69
70
|
|
71
|
+
|
72
|
+
## Quality control and adapter trimming
|
73
|
+
|
74
|
+
```bash
|
75
|
+
# Generate snakemake config file for quality control and adapter trimming.
|
76
|
+
offtracker_qc.py -t 4 \
|
77
|
+
-f /Your_Path_To_Input_Folder \
|
78
|
+
--subfolder 0
|
79
|
+
|
80
|
+
cd /Your_Path_To_Input_Folder/Trimmed_data
|
81
|
+
snakemake -np # dry run to check whether everything is alright
|
82
|
+
nohup snakemake --cores 16 1>${outdir}/sm_qc.log 2>&1 &
|
83
|
+
|
84
|
+
"""
|
85
|
+
Set “--subfolder 0” if the file structure is like:
|
86
|
+
| - Input_Folder
|
87
|
+
| - sample1_R1.fastq.gz
|
88
|
+
| - sample1_R2.fastq.gz
|
89
|
+
| - sample2_R1.fastq.gz
|
90
|
+
| - sample2_R2.fastq.gz
|
91
|
+
Set “--subfolder 1” if the file structure is like:
|
92
|
+
| - Input_Folder
|
93
|
+
| - Sample1_Folder
|
94
|
+
| - sample1_R1.fastq.gz
|
95
|
+
| - sample1_R2.fastq.gz
|
96
|
+
| - Sample2_Folder
|
97
|
+
| - sample2_R1.fastq.gz
|
98
|
+
| - sample2_R2.fastq.gz
|
99
|
+
|
100
|
+
The script “offtracker_qc.py” will create a “Trimmed_data” folder under /Your_Path_To_Input_Folder.
|
101
|
+
If “-o /Your_Path_To_Output” is set, the output will be redirected to /Your_Path_To_Output.
|
102
|
+
"""
|
103
|
+
```
|
104
|
+
|
70
105
|
## Strand-specific mapping of Tracking-seq data
|
71
106
|
|
72
107
|
```bash
|
73
|
-
|
74
|
-
#
|
75
|
-
# if -o is not set, the output will be in the same folder as the fastq files
|
108
|
+
|
109
|
+
# Generate snakemake config file for mapping
|
110
|
+
# Results will be generated in /Your_Path_To_Output, if -o is not set, the output will be in the same folder as the fastq files
|
76
111
|
offtracker_config.py -t 8 -g hg38 --blacklist hg38 \
|
77
112
|
-r /Your_Path_To_Reference/hg38_genome.fa \
|
78
113
|
-i /Your_Path_To_Reference/hg38_genome.chromap.index \
|
79
|
-
-f /
|
114
|
+
-f /Your_Path_To_Trimmed_Data \
|
80
115
|
-o /Your_Path_To_Output \
|
81
116
|
--subfolder 0
|
82
117
|
|
118
|
+
# Warning: Do not contain "fastq" or "fq" in the folder name, otherwise the program may treat the folder as a fastq file
|
119
|
+
# This problem may be fixed in the future
|
120
|
+
|
83
121
|
# Run the snakemake program
|
84
122
|
cd /Your_Path_To_Fastq
|
85
123
|
snakemake -np # dry run
|
86
|
-
nohup snakemake --cores 16 1>
|
124
|
+
nohup snakemake --cores 16 1>sm_mapping.log 2>sm_mapping.err &
|
87
125
|
|
88
126
|
## about cores
|
89
127
|
# --cores of snakemake must be larger than -t of offtracker_config.py
|
@@ -98,7 +136,7 @@ nohup snakemake --cores 16 1>snakemake.log 2>snakemake.err &
|
|
98
136
|
## Analyzing the genome-wide off-target sites
|
99
137
|
|
100
138
|
```bash
|
101
|
-
# In this part, multiple samples in the same condition can be analyzed in a single run by pattern
|
139
|
+
# In this part, multiple samples in the same condition can be analyzed in a single run by pattern recognition of sample names
|
102
140
|
|
103
141
|
offtracker_analysis.py -g hg38 --name "VEGFA2" \
|
104
142
|
--exp 'Cas9_VEGFA2' \
|
@@ -127,19 +165,18 @@ offtracker_plot.py --result Your_Offtracker_Result_CSV \
|
|
127
165
|
--sgrna 'GACCCCCTCCACCCCGCCTC' --pam 'NGG'
|
128
166
|
|
129
167
|
# The default output is a pdf file with Offtracker_result_{outname}.pdf
|
130
|
-
#
|
168
|
+
# Assigning a specific output file with another suffix can change the format. e.g., "--output Offtracker_plot.png" will generate a png file.
|
131
169
|
# The orange dash line indicates the empirical threshold of Track score = 2
|
132
170
|
# Empirically, the off-target sites with Track score < 2 are less likely to be real off-target sites.
|
133
171
|
```
|
134
172
|
|
135
173
|
|
136
|
-
## Note1
|
174
|
+
## Note1, when not using hg38 or mm10
|
137
175
|
|
138
|
-
The default setting only includes chr1-chr22, chrX, chrY, and chrM.
|
176
|
+
The default setting only includes chr1-chr22, chrX, chrY, and chrM. (only suitable for human and mouse) \
|
177
|
+
If you are using reference genomes without "chr" at the beginning, or want to analyze all chromosomes or other species, you can set "--ignore_chr" when running offtracker_config.py to skip chromosome filter.
|
139
178
|
|
140
|
-
Currently, this software is only ready-to-use for mm10 and hg38. For any other genome, e.g., hg19, please add genome size file named "hg19.chrom.sizes" to .\offtracker\
|
141
|
-
|
142
|
-
If you have a requirement for species other than human/mouse, please post an issue.
|
179
|
+
Currently, this software is only ready-to-use for mm10 and hg38. For any other genome, e.g., hg19, please add a genome size file named "hg19.chrom.sizes" to .\offtracker\utility. Besides, add "--blacklist none" or "--blacklist Your_Blacklist" (e.g., ENCODE blacklist) when running offtracker_config.py, because we only include blacklists for mm10 and hg38.
|
143
180
|
|
144
181
|
## Note2
|
145
182
|
|
@@ -172,6 +209,7 @@ These files can be visualized in genome browser like IGV:
|
|
172
209
|
|
173
210
|

|
174
211
|
|
212
|
+
The signal (coverage) for each sample is normalized to 1e7/total_reads. As only reads mapping to chr6 were extracted in the example data, the signal range is much higher than that of the whole genome samples.
|
175
213
|
|
176
214
|
## Whole genome off-target analysis
|
177
215
|
|
@@ -183,7 +221,13 @@ After that, you can visualize the off-target sites with their genomic sequence (
|
|
183
221
|
|
184
222
|
# Citation
|
185
223
|
|
224
|
+
If you use Tracking-seq or OFF-TRACKER in your research, please cite the following paper:
|
225
|
+
|
226
|
+
Zhu, M., Xu, R., Yuan, J., Wang, J. et al. Tracking-seq reveals the heterogeneity of off-target effects in CRISPR–Cas9-mediated genome editing. Nat Biotechnol (2024). https://doi.org/10.1038/s41587-024-02307-y
|
186
227
|
|
228
|
+
The signal visualization of .bw file here was generated by the Integrative Genomics Viewer (IGV) software. The signal visualization in the Tracking-seq article above was generated by either IGV or pyGenomeTracks:
|
187
229
|
|
230
|
+
Robinson, J., Thorvaldsdóttir, H., Winckler, W. et al. Integrative genomics viewer. Nat Biotechnol 29, 24–26 (2011). https://doi.org/10.1038/nbt.1754
|
188
231
|
|
232
|
+
Lopez-Delisle L, Rabbani L, Wolff J, Bhardwaj V, Backofen R, Grüning B, Ramírez F, Manke T. pyGenomeTracks: reproducible plots for multivariate genomic data sets. Bioinformatics. 2020 Aug 3:btaa692. doi: 10.1093/bioinformatics/btaa692.
|
189
233
|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
LICENSE.txt
|
2
|
+
MANIFEST.in
|
3
|
+
README.md
|
4
|
+
setup.py
|
5
|
+
offtracker/X_offplot.py
|
6
|
+
offtracker/X_offtracker.py
|
7
|
+
offtracker/X_sequence.py
|
8
|
+
offtracker/__init__.py
|
9
|
+
offtracker/_version.py
|
10
|
+
offtracker.egg-info/PKG-INFO
|
11
|
+
offtracker.egg-info/SOURCES.txt
|
12
|
+
offtracker.egg-info/dependency_links.txt
|
13
|
+
offtracker.egg-info/requires.txt
|
14
|
+
offtracker.egg-info/top_level.txt
|
15
|
+
offtracker/snakefile/Snakefile_QC.smk
|
16
|
+
offtracker/snakefile/Snakefile_offtracker.smk
|
17
|
+
offtracker/utility/1.1_bed2fr.py
|
18
|
+
offtracker/utility/1.3_bdg_normalize_v4.0.py
|
19
|
+
offtracker/utility/bedGraphToBigWig
|
20
|
+
offtracker/utility/hg38.chrom.sizes
|
21
|
+
offtracker/utility/mm10.chrom.sizes
|
22
|
+
offtracker/utility/offtracker_blacklist_hg38.merged.bed
|
23
|
+
offtracker/utility/offtracker_blacklist_mm10.merged.bed
|
24
|
+
scripts/offtracker_analysis.py
|
25
|
+
scripts/offtracker_candidates.py
|
26
|
+
scripts/offtracker_config.py
|
27
|
+
scripts/offtracker_plot.py
|
28
|
+
scripts/offtracker_qc.py
|
@@ -27,6 +27,7 @@ def main():
|
|
27
27
|
parser.add_argument('--exp' , type=str, default='all', nargs='+', help='A substring mark in the name of experimental samples. The default is to use all samples other than control' )
|
28
28
|
parser.add_argument('--control' , type=str, default='none', nargs='+', help='A substring mark in the name of control samples. The default is no control. "others" for all samples other than --exp.' )
|
29
29
|
parser.add_argument('--fdr' , type=int, default=0.05, help='FDR threshold for the final result. Default is 0.05.')
|
30
|
+
parser.add_argument('--score' , type=int, default=2, help='Track score threshold for the final result. Default is 2.')
|
30
31
|
parser.add_argument('--smooth' , type=int, default=1, help='Smooth strength for the signal.')
|
31
32
|
parser.add_argument('--window' , type=int, default=3, help='Window size for smoothing the signal.')
|
32
33
|
parser.add_argument('--binsize' , type=int, default=100, help='Window size for smoothing the signal.')
|
@@ -42,6 +43,7 @@ def main():
|
|
42
43
|
parser.add_argument('--overwrite' , action='store_true', help='Whether to overwrite existed dataframes.' )
|
43
44
|
parser.add_argument('--clean' , action='store_true', help='Whether to remove temp files')
|
44
45
|
|
46
|
+
|
45
47
|
args = parser.parse_args()
|
46
48
|
|
47
49
|
print(f'Runing offtracker verision: {offtracker.__version__}')
|
@@ -51,6 +53,7 @@ def main():
|
|
51
53
|
pattern_exp = args.exp
|
52
54
|
pattern_ctr = args.control
|
53
55
|
fdr_thresh = args.fdr
|
56
|
+
score_thresh = args.score
|
54
57
|
binsize = args.binsize
|
55
58
|
flank_max = args.flank_max
|
56
59
|
flank_regions = args.flank_regions
|
@@ -95,6 +98,8 @@ def main():
|
|
95
98
|
all_sample_files.extend( bdg_files )
|
96
99
|
all_sample_files = pd.Series(all_sample_files)
|
97
100
|
all_sample_names = pd.Series(all_sample_names)
|
101
|
+
print('all sample names in the folders:')
|
102
|
+
print(all_sample_names)
|
98
103
|
print('your string pattern for experimental groups: ', pattern_exp)
|
99
104
|
ctr_samples = []
|
100
105
|
if pattern_ctr == 'none':
|
@@ -341,14 +346,16 @@ def main():
|
|
341
346
|
print('mean_score:{:.3f};std:{:.3f}'.format(mu,std))
|
342
347
|
# pv and fdr
|
343
348
|
df_result['pv'] = df_result[f'log2_track_score'].apply( lambda x: norm.sf(x,loc=mu,scale=std) )
|
344
|
-
df_result['pv'].clip(lower=1e-320
|
349
|
+
df_result['pv'] = df_result['pv'].clip(lower=1e-320)
|
345
350
|
df_result['fdr'] = offtracker.fdr(df_result['pv'])
|
346
351
|
df_result['rank'] = range(1,len(df_result)+1)
|
347
352
|
df_result.to_csv(output)
|
348
353
|
# 2024.06.03. 以防 fdr<=fdr_thresh 滤掉了 track_score>=2 的位点
|
349
354
|
bool_fdr = df_result['fdr']<=fdr_thresh
|
350
|
-
bool_score = df_result['track_score']>=
|
351
|
-
|
355
|
+
bool_score = df_result['track_score']>=score_thresh
|
356
|
+
# 2025.06.05. BE可能会形成单边信号,导致 track_score 为负数,也保留
|
357
|
+
bool_neg_score = df_result['track_score']<0
|
358
|
+
df_output = df_result[bool_fdr|bool_score|bool_neg_score].copy()
|
352
359
|
if pattern_ctr != 'none':
|
353
360
|
df_output = df_output[['target_location', 'best_strand','best_target','deletion','insertion','mismatch',
|
354
361
|
'exp_L_length', 'exp_R_length','ctr_L_length','ctr_R_length','L_length','R_length','signal_length',
|