offtracker 2.7.10__zip → 2.10.0__zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {offtracker-2.7.10/offtracker.egg-info → offtracker-2.10.0}/PKG-INFO +62 -18
  2. {offtracker-2.7.10 → offtracker-2.10.0}/README.md +62 -18
  3. {offtracker-2.7.10 → offtracker-2.10.0}/offtracker/X_offplot.py +13 -2
  4. {offtracker-2.7.10 → offtracker-2.10.0}/offtracker/X_sequence.py +113 -7
  5. {offtracker-2.7.10 → offtracker-2.10.0}/offtracker/_version.py +8 -2
  6. offtracker-2.10.0/offtracker/snakefile/Snakefile_QC.smk +66 -0
  7. offtracker-2.10.0/offtracker/snakefile/Snakefile_offtracker.smk +249 -0
  8. offtracker-2.7.10/offtracker/mapping/1.1_bed2fr_v4.5.py → offtracker-2.10.0/offtracker/utility/1.1_bed2fr.py +6 -4
  9. {offtracker-2.7.10 → offtracker-2.10.0/offtracker.egg-info}/PKG-INFO +62 -18
  10. offtracker-2.10.0/offtracker.egg-info/SOURCES.txt +28 -0
  11. {offtracker-2.7.10 → offtracker-2.10.0}/scripts/offtracker_analysis.py +10 -3
  12. offtracker-2.10.0/scripts/offtracker_candidates.py +318 -0
  13. {offtracker-2.7.10 → offtracker-2.10.0}/scripts/offtracker_config.py +28 -44
  14. offtracker-2.10.0/scripts/offtracker_qc.py +62 -0
  15. {offtracker-2.7.10 → offtracker-2.10.0}/setup.py +5 -4
  16. offtracker-2.7.10/offtracker/mapping/Snakefile_offtracker +0 -245
  17. offtracker-2.7.10/offtracker.egg-info/SOURCES.txt +0 -26
  18. offtracker-2.7.10/scripts/offtracker_candidates.py +0 -307
  19. {offtracker-2.7.10 → offtracker-2.10.0}/LICENSE.txt +0 -0
  20. {offtracker-2.7.10 → offtracker-2.10.0}/MANIFEST.in +0 -0
  21. {offtracker-2.7.10 → offtracker-2.10.0}/offtracker/X_offtracker.py +0 -0
  22. {offtracker-2.7.10 → offtracker-2.10.0}/offtracker/__init__.py +0 -0
  23. {offtracker-2.7.10/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/1.3_bdg_normalize_v4.0.py +0 -0
  24. {offtracker-2.7.10/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/bedGraphToBigWig +0 -0
  25. {offtracker-2.7.10/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/hg38.chrom.sizes +0 -0
  26. {offtracker-2.7.10/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/mm10.chrom.sizes +0 -0
  27. {offtracker-2.7.10/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/offtracker_blacklist_hg38.merged.bed +0 -0
  28. {offtracker-2.7.10/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/offtracker_blacklist_mm10.merged.bed +0 -0
  29. {offtracker-2.7.10 → offtracker-2.10.0}/offtracker.egg-info/dependency_links.txt +0 -0
  30. {offtracker-2.7.10 → offtracker-2.10.0}/offtracker.egg-info/requires.txt +0 -0
  31. {offtracker-2.7.10 → offtracker-2.10.0}/offtracker.egg-info/top_level.txt +0 -0
  32. {offtracker-2.7.10 → offtracker-2.10.0}/scripts/offtracker_plot.py +0 -0
  33. {offtracker-2.7.10 → offtracker-2.10.0}/setup.cfg +0 -0
@@ -0,0 +1,249 @@
1
+ # 2023.08.11. adding a option for not normalizing the bw file
2
+ # 2024.01.23. add --fixedStep to bigwigCompare for not merging neighbouring bins with equal values.
3
+ # 2025.05.22. refine the structure
4
+
5
+ configfile: "config.yaml"
6
+
7
+ # # fastq 信息
8
+ _files_R1 = config['files_R1'] # dict型, key 为 sample
9
+ _files_R2 = config['files_R2'] # dict型, key 为 sample
10
+ # # 运行参数
11
+ _output_dir = config["output_dir"]
12
+ _thread = config['thread']
13
+ _BinSize = str(config["binsize"])
14
+ _normalize = config["normalize"]
15
+
16
+
17
+ import os
18
+
19
+ if _normalize == "True":
20
+ rule all:
21
+ input:
22
+ expand( os.path.join(_output_dir,"{sample}.fw.bed"), sample=_files_R1 ),
23
+ expand( os.path.join(_output_dir,"{sample}.rv.bed"), sample=_files_R1 ),
24
+ expand( os.path.join(_output_dir,"{sample}.fw.scaled.bw"), sample=_files_R1 ),
25
+ expand( os.path.join(_output_dir,"{sample}.rv.scaled.bw"), sample=_files_R1 ),
26
+ expand( os.path.join(_output_dir,"{sample}." + _BinSize + ".add.bdg"),sample=_files_R1 ),
27
+ elif _normalize == "False":
28
+ rule all:
29
+ input:
30
+ expand( os.path.join(_output_dir,"{sample}.fw.bed"), sample=_files_R1 ),
31
+ expand( os.path.join(_output_dir,"{sample}.rv.bed"), sample=_files_R1 ),
32
+ expand( os.path.join(_output_dir,"{sample}.fw.raw.bw"), sample=_files_R1 ),
33
+ expand( os.path.join(_output_dir,"{sample}.rv.raw.bw"), sample=_files_R1 ),
34
+ else:
35
+ raise ValueError('Please provide "True" or "False" for "--normalize" when running offtracker_config.py')
36
+
37
+
38
+ rule chromap:
39
+ input:
40
+ R1=lambda w: _files_R1[w.sample],
41
+ R2=lambda w: _files_R2[w.sample]
42
+ threads:
43
+ _threads
44
+ params:
45
+ index=config["index"],
46
+ fasta=config["fasta"]
47
+ output:
48
+ temp(os.path.join(_output_dir,"{sample}.chromapx.bed"))
49
+ shell:
50
+ """
51
+ chromap -l 3000 --low-mem --BED --remove-pcr-duplicates \
52
+ --min-read-length 10 --allocate-multi-mappings \
53
+ -x {params.index} -r {params.fasta} -t {threads} -1 {input.R1} -2 {input.R2} -o {output}
54
+ """
55
+
56
+ if config["blacklist"] != 'none':
57
+ rule remove_blacklist:
58
+ input:
59
+ os.path.join(_output_dir,"{sample}.chromapx.bed")
60
+ threads:
61
+ _threads
62
+ params:
63
+ blacklist=config["blacklist"]
64
+ output:
65
+ temp(os.path.join(_output_dir,"{sample}.filtered.bed"))
66
+ shell:
67
+ "bedtools intersect -a {input} -b {params.blacklist} -v > {output}"
68
+
69
+ rule bed2fr:
70
+ input:
71
+ os.path.join(_output_dir,"{sample}.filtered.bed")
72
+ threads:
73
+ _threads
74
+ params:
75
+ dir_script=config["utility_dir"],
76
+ ignore_chr=config["ignore_chr"],
77
+ output:
78
+ fw=os.path.join(_output_dir,"{sample}.fw.bed"),
79
+ rv=os.path.join(_output_dir,"{sample}.rv.bed")
80
+ shell:
81
+ "python {params.dir_script}/1.1_bed2fr.py -b {input} {params.ignore_chr}"
82
+ else:
83
+ rule bed2fr:
84
+ input:
85
+ os.path.join(_output_dir,"{sample}.chromapx.bed")
86
+ threads:
87
+ _threads
88
+ params:
89
+ dir_script=config["utility_dir"],
90
+ ignore_chr=config["ignore_chr"],
91
+ output:
92
+ fw=os.path.join(_output_dir,"{sample}.fw.bed"),
93
+ rv=os.path.join(_output_dir,"{sample}.rv.bed")
94
+ shell:
95
+ "python {params.dir_script}/1.1_bed2fr.py -b {input} {params.ignore_chr}"
96
+
97
+ rule bed2bdg_fw:
98
+ input:
99
+ os.path.join(_output_dir,"{sample}.fw.bed")
100
+ threads:
101
+ _threads
102
+ params:
103
+ gl=config["genomelen"]
104
+ output:
105
+ temp(os.path.join(_output_dir,"{sample}.fw.bdg"))
106
+ shell:
107
+ "bedtools genomecov -bg -i {input} -g {params.gl} > {output}"
108
+
109
+ rule bed2bdg_rv:
110
+ input:
111
+ os.path.join(_output_dir,"{sample}.rv.bed")
112
+ threads:
113
+ _threads
114
+ params:
115
+ gl=config["genomelen"]
116
+ output:
117
+ temp(os.path.join(_output_dir,"{sample}.rv.bdg"))
118
+ shell:
119
+ "bedtools genomecov -bg -i {input} -g {params.gl} > {output}"
120
+
121
+ rule bdg_sort_fw:
122
+ input:
123
+ fw=os.path.join(_output_dir,"{sample}.fw.bdg")
124
+ threads:
125
+ _threads
126
+ output:
127
+ temp(os.path.join(_output_dir,"{sample}.fw.sorted.bdg"))
128
+ shell:
129
+ "bedtools sort -i {input.fw} > {output}"
130
+
131
+ rule bdg_sort_rv:
132
+ input:
133
+ rv=os.path.join(_output_dir,"{sample}.rv.bdg")
134
+ threads:
135
+ _threads
136
+ output:
137
+ temp(os.path.join(_output_dir,"{sample}.rv.sorted.bdg"))
138
+ shell:
139
+ "bedtools sort -i {input.rv} > {output}"
140
+
141
+ if _normalize == "True":
142
+ rule bdg_normalize_fw:
143
+ input:
144
+ bdg=os.path.join(_output_dir,"{sample}.fw.sorted.bdg"),
145
+ bed=os.path.join(_output_dir,"{sample}.fw.bed")
146
+ threads:
147
+ _threads
148
+ params:
149
+ dir_script=config["utility_dir"]
150
+ output:
151
+ temp(os.path.join(_output_dir,"{sample}.fw.scaled.bdg"))
152
+ shell:
153
+ "python {params.dir_script}/1.3_bdg_normalize_v4.0.py --bdg {input.bdg} --bed {input.bed}"
154
+
155
+ rule bdg_normalize_rv:
156
+ input:
157
+ bdg=os.path.join(_output_dir,"{sample}.rv.sorted.bdg"),
158
+ bed=os.path.join(_output_dir,"{sample}.rv.bed")
159
+ threads:
160
+ _threads
161
+ params:
162
+ dir_script=config["utility_dir"]
163
+ output:
164
+ temp(os.path.join(_output_dir,"{sample}.rv.scaled.bdg"))
165
+ shell:
166
+ "python {params.dir_script}/1.3_bdg_normalize_v4.0.py --bdg {input.bdg} --bed {input.bed}"
167
+
168
+ rule bdg2bw_fw:
169
+ input:
170
+ os.path.join(_output_dir,"{sample}.fw.scaled.bdg")
171
+ threads:
172
+ _threads
173
+ params:
174
+ gl=config["genomelen"],
175
+ dir_script=config["utility_dir"]
176
+ output:
177
+ os.path.join(_output_dir,"{sample}.fw.scaled.bw")
178
+ shell:
179
+ "{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"
180
+
181
+ rule bdg2bw_rv:
182
+ input:
183
+ os.path.join(_output_dir,"{sample}.rv.scaled.bdg")
184
+ threads:
185
+ _threads
186
+ params:
187
+ gl=config["genomelen"],
188
+ dir_script=config["utility_dir"]
189
+ output:
190
+ os.path.join(_output_dir,"{sample}.rv.scaled.bw")
191
+ shell:
192
+ "{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"
193
+
194
+ rule bwAdd:
195
+ input:
196
+ fw=os.path.join(_output_dir,"{sample}.fw.scaled.bw"),
197
+ rv=os.path.join(_output_dir,"{sample}.rv.scaled.bw")
198
+ threads:
199
+ _threads
200
+ output:
201
+ os.path.join(_output_dir,"{sample}." + _BinSize + ".add.bdg")
202
+ shell:
203
+ """
204
+ bigwigCompare --binSize {_BinSize} -p {threads} --verbose -o {output} \
205
+ --outFileFormat bedgraph --fixedStep \
206
+ --bigwig1 {input.fw} \
207
+ --bigwig2 {input.rv} \
208
+ --operation add
209
+ """
210
+ else:
211
+ rule bdg_reverse_rv:
212
+ input:
213
+ os.path.join(_output_dir,"{sample}.rv.sorted.bdg")
214
+ threads:
215
+ _threads
216
+ output:
217
+ temp(os.path.join(_output_dir,"{sample}.rv.sorted_r.bdg"))
218
+ shell:
219
+ "awk -F '\t' -v OFS='\t' '{{$4=-$4; print}}' {input} > {output}"
220
+
221
+ rule bdg2bw_fw:
222
+ input:
223
+ os.path.join(_output_dir,"{sample}.fw.sorted.bdg")
224
+ threads:
225
+ _threads
226
+ params:
227
+ gl=config["genomelen"],
228
+ dir_script=config["utility_dir"]
229
+ output:
230
+ os.path.join(_output_dir,"{sample}.fw.raw.bw")
231
+ shell:
232
+ "{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"
233
+
234
+ rule bdg2bw_rv:
235
+ input:
236
+ os.path.join(_output_dir,"{sample}.rv.sorted_r.bdg")
237
+ threads:
238
+ _threads
239
+ params:
240
+ gl=config["genomelen"],
241
+ dir_script=config["utility_dir"]
242
+ output:
243
+ os.path.join(_output_dir,"{sample}.rv.raw.bw")
244
+ shell:
245
+ "{params.dir_script}/bedGraphToBigWig {input} {params.gl} {output}"
246
+
247
+
248
+
249
+
@@ -8,19 +8,21 @@ parser.description='这算一个小彩蛋'
8
8
  # 2022.10.21. v3.0: 文件名长度 chromap -> filtered
9
9
  # 2022.10.26. v4.0: f,r 改成 fw,rv
10
10
  # 2022.01.11. v4.5: 只取 common chromosomes (chr1-chr22, chrX, chrY, chrM)
11
+ # 2025.06.05. v5.0: 增加 ignore_chr 选项,默认只取 common chromosomes
11
12
 
12
13
  # 单文件处理脚本,配合snakemake使用
13
14
 
14
15
  parser.add_argument("-b", "--bed", type=str, metavar="dir_bed" , required=True, help="dir of bed file")
16
+ parser.add_argument('--ignore_chr', action='store_true', help='If not set, only chr1-chr22, chrX, chrY, chrM will be analyzed.')
15
17
 
16
18
  args = parser.parse_args()
17
19
 
18
20
  bed_file = pd.read_csv( args.bed, sep='\t', header=None)
19
21
 
20
- common_chr = pd.Series(['chr']*22).str[:] + pd.Series(range(1,23)).astype(str).str[:]
21
- common_chr = pd.concat([common_chr, pd.Series(['chrX','chrY','chrM'])]).to_numpy()
22
-
23
- bed_file = bed_file[bed_file[0].isin(common_chr)]
22
+ if not args.ignore_chr:
23
+ common_chr = pd.Series(['chr']*22).str[:] + pd.Series(range(1,23)).astype(str).str[:]
24
+ common_chr = pd.concat([common_chr, pd.Series(['chrX','chrY','chrM'])]).to_numpy()
25
+ bed_file = bed_file[bed_file[0].isin(common_chr)]
24
26
 
25
27
  bed_f = bed_file[bed_file[5]=='+']
26
28
  bed_r = bed_file[bed_file[5]=='-']
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: offtracker
3
- Version: 2.7.10
3
+ Version: 2.10.0
4
4
  Summary: Tracking-seq data analysis
5
5
  Home-page: https://github.com/Lan-lab/offtracker
6
6
  Author: Runda Xu
7
- Author-email: runda.xu@foxmail.com
7
+ Author-email: xrd18@tsinghua.org.cn
8
8
  Requires-Python: >=3.6.0
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE.txt
@@ -22,9 +22,10 @@ OFF-TRACKER is an end to end pipeline of Tracking-seq data analysis for detectin
22
22
  ## Dependency
23
23
 
24
24
  ```bash
25
- # We recommend creating a new enviroment using mamba/conda to avoid compatibility problems
25
+ # We recommend creating a new environment using mamba/conda to avoid compatibility problems
26
26
  # If you don't use mamba, just replace the code with conda
27
- mamba create -n offtracker -c bioconda blast snakemake pybedtools
27
+ # Windows systems may not be compatible with pybedtools.
28
+ mamba create -n offtracker -c bioconda blast snakemake pybedtools chromap
28
29
  ```
29
30
 
30
31
 
@@ -58,32 +59,69 @@ chromap -i -r /Your_Path_To_Reference/hg38_genome.fa \
58
59
  -o /Your_Path_To_Reference/hg38_genome.chromap.index
59
60
 
60
61
  # Generate candidate regions by sgRNA sequence (need once for each genome and sgRNA)
61
- # --name: the name of the sgRNA, which will be used in the following analysis
62
+ # --name: a user-defined name of the sgRNA, which will be used in the following analysis.
62
63
  offtracker_candidates.py -t 8 -g hg38 \
63
64
  -r /Your_Path_To_Reference/hg38_genome.fa \
64
65
  -b /Your_Path_To_Reference/hg38_genome.blastdb \
65
66
  --name 'VEGFA2' --sgrna 'GACCCCCTCCACCCCGCCTC' --pam 'NGG' \
66
- -o /Your_Path_To_Candidates
67
+ -o /Your_Path_To_Candidates_Folder
67
68
 
68
69
  ```
69
70
 
71
+
72
+ ## Quality control and adapter trimming
73
+
74
+ ```bash
75
+ # Generate snakemake config file for quality control and adapter trimming.
76
+ offtracker_qc.py -t 4 \
77
+ -f /Your_Path_To_Input_Folder \
78
+ --subfolder 0
79
+
80
+ cd /Your_Path_To_Input_Folder/Trimmed_data
81
+ snakemake -np # dry run to check whether everything is alright
82
+ nohup snakemake --cores 16 1>${outdir}/sm_qc.log 2>&1 &
83
+
84
+ """
85
+ Set “--subfolder 0” if the file structure is like:
86
+ | - Input_Folder
87
+ | - sample1_R1.fastq.gz
88
+ | - sample1_R2.fastq.gz
89
+ | - sample2_R1.fastq.gz
90
+ | - sample2_R2.fastq.gz
91
+ Set “--subfolder 1” if the file structure is like:
92
+ | - Input_Folder
93
+ | - Sample1_Folder
94
+ | - sample1_R1.fastq.gz
95
+ | - sample1_R2.fastq.gz
96
+ | - Sample2_Folder
97
+ | - sample2_R1.fastq.gz
98
+ | - sample2_R2.fastq.gz
99
+
100
+ The script “offtracker_qc.py” will create a “Trimmed_data” folder under /Your_Path_To_Input_Folder.
101
+ If “-o /Your_Path_To_Output” is set, the output will be redirected to /Your_Path_To_Output.
102
+ """
103
+ ```
104
+
70
105
  ## Strand-specific mapping of Tracking-seq data
71
106
 
72
107
  ```bash
73
- # Generate snakemake config file
74
- # --subfolder: If different samples are in seperate folders, set this to 1
75
- # if -o is not set, the output will be in the same folder as the fastq files
108
+
109
+ # Generate snakemake config file for mapping
110
+ # Results will be generated in /Your_Path_To_Output, if -o is not set, the output will be in the same folder as the fastq files
76
111
  offtracker_config.py -t 8 -g hg38 --blacklist hg38 \
77
112
  -r /Your_Path_To_Reference/hg38_genome.fa \
78
113
  -i /Your_Path_To_Reference/hg38_genome.chromap.index \
79
- -f /Your_Path_To_Fastq \
114
+ -f /Your_Path_To_Trimmed_Data \
80
115
  -o /Your_Path_To_Output \
81
116
  --subfolder 0
82
117
 
118
+ # Warning: Do not contain "fastq" or "fq" in the folder name, otherwise the program may treat the folder as a fastq file
119
+ # This problem may be fixed in the future
120
+
83
121
  # Run the snakemake program
84
122
  cd /Your_Path_To_Fastq
85
123
  snakemake -np # dry run
86
- nohup snakemake --cores 16 1>snakemake.log 2>snakemake.err &
124
+ nohup snakemake --cores 16 1>sm_mapping.log 2>sm_mapping.err &
87
125
 
88
126
  ## about cores
89
127
  # --cores of snakemake must be larger than -t of offtracker_config.py
@@ -98,7 +136,7 @@ nohup snakemake --cores 16 1>snakemake.log 2>snakemake.err &
98
136
  ## Analyzing the genome-wide off-target sites
99
137
 
100
138
  ```bash
101
- # In this part, multiple samples in the same condition can be analyzed in a single run by pattern recogonization of sample names
139
+ # In this part, multiple samples in the same condition can be analyzed in a single run by pattern recognition of sample names
102
140
 
103
141
  offtracker_analysis.py -g hg38 --name "VEGFA2" \
104
142
  --exp 'Cas9_VEGFA2' \
@@ -127,19 +165,18 @@ offtracker_plot.py --result Your_Offtracker_Result_CSV \
127
165
  --sgrna 'GACCCCCTCCACCCCGCCTC' --pam 'NGG'
128
166
 
129
167
  # The default output is a pdf file with Offtracker_result_{outname}.pdf
130
- # Change the suffix of the output file to change the format (e.g.: .png)
168
+ # Assigning a specific output file with another suffix can change the format. e.g., "--output Offtracker_plot.png" will generate a png file.
131
169
  # The orange dash line indicates the empirical threshold of Track score = 2
132
170
  # Empirically, the off-target sites with Track score < 2 are less likely to be real off-target sites.
133
171
  ```
134
172
 
135
173
 
136
- ## Note1
174
+ ## Note1, when not using hg38 or mm10
137
175
 
138
- The default setting only includes chr1-chr22, chrX, chrY, and chrM. Please make sure the reference genome contains "chr" at the beginning.
176
+ The default setting only includes chr1-chr22, chrX, chrY, and chrM. (only suitable for human and mouse) \
177
+ If you are using reference genomes without "chr" at the beginning, or want to analyze all chromosomes or other species, you can set "--ignore_chr" when running offtracker_config.py to skip chromosome filter.
139
178
 
140
- Currently, this software is only ready-to-use for mm10 and hg38. For any other genome, e.g., hg19, please add genome size file named "hg19.chrom.sizes" to .\offtracker\mapping and instal manually. Besides, add "--blacklist none" or "--blacklist Your_Blacklist" (e.g., ENCODE blacklist) when running offtracker_config.py, because we only provide blacklists for mm10 and hg38.
141
-
142
- If you have a requirement for species other than human/mouse, please post an issue.
179
+ Currently, this software is only ready-to-use for mm10 and hg38. For any other genome, e.g., hg19, please add a genome size file named "hg19.chrom.sizes" to .\offtracker\utility. Besides, add "--blacklist none" or "--blacklist Your_Blacklist" (e.g., ENCODE blacklist) when running offtracker_config.py, because we only include blacklists for mm10 and hg38.
143
180
 
144
181
  ## Note2
145
182
 
@@ -172,6 +209,7 @@ These files can be visualized in genome browser like IGV:
172
209
 
173
210
  ![signal](https://github.com/Lan-lab/offtracker/blob/main/example_output/signals_example.png?raw=true)
174
211
 
212
+ The signal (coverage) for each sample is normalized to 1e7/total_reads. As only reads mapping to chr6 were extracted in the example data, the signal range is much higher than that of the whole genome samples.
175
213
 
176
214
  ## Whole genome off-target analysis
177
215
 
@@ -183,7 +221,13 @@ After that, you can visualize the off-target sites with their genomic sequence (
183
221
 
184
222
  # Citation
185
223
 
224
+ If you use Tracking-seq or OFF-TRACKER in your research, please cite the following paper:
225
+
226
+ Zhu, M., Xu, R., Yuan, J., Wang, J. et al. Tracking-seq reveals the heterogeneity of off-target effects in CRISPR–Cas9-mediated genome editing. Nat Biotechnol (2024). https://doi.org/10.1038/s41587-024-02307-y
186
227
 
228
+ The signal visualization of .bw file here was generated by the Integrative Genomics Viewer (IGV) software. The signal visualization in the Tracking-seq article above was generated by either IGV or pyGenomeTracks:
187
229
 
230
+ Robinson, J., Thorvaldsdóttir, H., Winckler, W. et al. Integrative genomics viewer. Nat Biotechnol 29, 24–26 (2011). https://doi.org/10.1038/nbt.1754
188
231
 
232
+ Lopez-Delisle L, Rabbani L, Wolff J, Bhardwaj V, Backofen R, Grüning B, Ramírez F, Manke T. pyGenomeTracks: reproducible plots for multivariate genomic data sets. Bioinformatics. 2020 Aug 3:btaa692. doi: 10.1093/bioinformatics/btaa692.
189
233
 
@@ -0,0 +1,28 @@
1
+ LICENSE.txt
2
+ MANIFEST.in
3
+ README.md
4
+ setup.py
5
+ offtracker/X_offplot.py
6
+ offtracker/X_offtracker.py
7
+ offtracker/X_sequence.py
8
+ offtracker/__init__.py
9
+ offtracker/_version.py
10
+ offtracker.egg-info/PKG-INFO
11
+ offtracker.egg-info/SOURCES.txt
12
+ offtracker.egg-info/dependency_links.txt
13
+ offtracker.egg-info/requires.txt
14
+ offtracker.egg-info/top_level.txt
15
+ offtracker/snakefile/Snakefile_QC.smk
16
+ offtracker/snakefile/Snakefile_offtracker.smk
17
+ offtracker/utility/1.1_bed2fr.py
18
+ offtracker/utility/1.3_bdg_normalize_v4.0.py
19
+ offtracker/utility/bedGraphToBigWig
20
+ offtracker/utility/hg38.chrom.sizes
21
+ offtracker/utility/mm10.chrom.sizes
22
+ offtracker/utility/offtracker_blacklist_hg38.merged.bed
23
+ offtracker/utility/offtracker_blacklist_mm10.merged.bed
24
+ scripts/offtracker_analysis.py
25
+ scripts/offtracker_candidates.py
26
+ scripts/offtracker_config.py
27
+ scripts/offtracker_plot.py
28
+ scripts/offtracker_qc.py
@@ -27,6 +27,7 @@ def main():
27
27
  parser.add_argument('--exp' , type=str, default='all', nargs='+', help='A substring mark in the name of experimental samples. The default is to use all samples other than control' )
28
28
  parser.add_argument('--control' , type=str, default='none', nargs='+', help='A substring mark in the name of control samples. The default is no control. "others" for all samples other than --exp.' )
29
29
  parser.add_argument('--fdr' , type=int, default=0.05, help='FDR threshold for the final result. Default is 0.05.')
30
+ parser.add_argument('--score' , type=int, default=2, help='Track score threshold for the final result. Default is 2.')
30
31
  parser.add_argument('--smooth' , type=int, default=1, help='Smooth strength for the signal.')
31
32
  parser.add_argument('--window' , type=int, default=3, help='Window size for smoothing the signal.')
32
33
  parser.add_argument('--binsize' , type=int, default=100, help='Window size for smoothing the signal.')
@@ -42,6 +43,7 @@ def main():
42
43
  parser.add_argument('--overwrite' , action='store_true', help='Whether to overwrite existed dataframes.' )
43
44
  parser.add_argument('--clean' , action='store_true', help='Whether to remove temp files')
44
45
 
46
+
45
47
  args = parser.parse_args()
46
48
 
47
49
  print(f'Runing offtracker verision: {offtracker.__version__}')
@@ -51,6 +53,7 @@ def main():
51
53
  pattern_exp = args.exp
52
54
  pattern_ctr = args.control
53
55
  fdr_thresh = args.fdr
56
+ score_thresh = args.score
54
57
  binsize = args.binsize
55
58
  flank_max = args.flank_max
56
59
  flank_regions = args.flank_regions
@@ -95,6 +98,8 @@ def main():
95
98
  all_sample_files.extend( bdg_files )
96
99
  all_sample_files = pd.Series(all_sample_files)
97
100
  all_sample_names = pd.Series(all_sample_names)
101
+ print('all sample names in the folders:')
102
+ print(all_sample_names)
98
103
  print('your string pattern for experimental groups: ', pattern_exp)
99
104
  ctr_samples = []
100
105
  if pattern_ctr == 'none':
@@ -341,14 +346,16 @@ def main():
341
346
  print('mean_score:{:.3f};std:{:.3f}'.format(mu,std))
342
347
  # pv and fdr
343
348
  df_result['pv'] = df_result[f'log2_track_score'].apply( lambda x: norm.sf(x,loc=mu,scale=std) )
344
- df_result['pv'].clip(lower=1e-320,inplace=True)
349
+ df_result['pv'] = df_result['pv'].clip(lower=1e-320)
345
350
  df_result['fdr'] = offtracker.fdr(df_result['pv'])
346
351
  df_result['rank'] = range(1,len(df_result)+1)
347
352
  df_result.to_csv(output)
348
353
  # 2024.06.03. 以防 fdr<=fdr_thresh 滤掉了 track_score>=2 的位点
349
354
  bool_fdr = df_result['fdr']<=fdr_thresh
350
- bool_score = df_result['track_score']>=2
351
- df_output = df_result[bool_fdr|bool_score].copy()
355
+ bool_score = df_result['track_score']>=score_thresh
356
+ # 2025.06.05. BE可能会形成单边信号,导致 track_score 为负数,也保留
357
+ bool_neg_score = df_result['track_score']<0
358
+ df_output = df_result[bool_fdr|bool_score|bool_neg_score].copy()
352
359
  if pattern_ctr != 'none':
353
360
  df_output = df_output[['target_location', 'best_strand','best_target','deletion','insertion','mismatch',
354
361
  'exp_L_length', 'exp_R_length','ctr_L_length','ctr_R_length','L_length','R_length','signal_length',