offtracker 2.7.8__zip → 2.10.0__zip
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- offtracker-2.10.0/PKG-INFO +233 -0
- offtracker-2.10.0/README.md +221 -0
- {offtracker-2.7.8 → offtracker-2.10.0}/offtracker/X_offplot.py +37 -8
- {offtracker-2.7.8 → offtracker-2.10.0}/offtracker/X_sequence.py +113 -7
- offtracker-2.10.0/offtracker/_version.py +36 -0
- offtracker-2.10.0/offtracker/snakefile/Snakefile_QC.smk +66 -0
- offtracker-2.10.0/offtracker/snakefile/Snakefile_offtracker.smk +249 -0
- offtracker-2.7.8/offtracker/mapping/1.1_bed2fr_v4.5.py → offtracker-2.10.0/offtracker/utility/1.1_bed2fr.py +6 -4
- offtracker-2.10.0/offtracker.egg-info/PKG-INFO +233 -0
- offtracker-2.10.0/offtracker.egg-info/SOURCES.txt +28 -0
- {offtracker-2.7.8 → offtracker-2.10.0}/scripts/offtracker_analysis.py +20 -5
- offtracker-2.10.0/scripts/offtracker_candidates.py +318 -0
- {offtracker-2.7.8 → offtracker-2.10.0}/scripts/offtracker_config.py +28 -44
- offtracker-2.10.0/scripts/offtracker_plot.py +39 -0
- offtracker-2.10.0/scripts/offtracker_qc.py +62 -0
- {offtracker-2.7.8 → offtracker-2.10.0}/setup.py +8 -4
- offtracker-2.7.8/PKG-INFO +0 -146
- offtracker-2.7.8/README.md +0 -134
- offtracker-2.7.8/offtracker/_version.py +0 -28
- offtracker-2.7.8/offtracker/mapping/Snakefile_offtracker +0 -245
- offtracker-2.7.8/offtracker.egg-info/PKG-INFO +0 -146
- offtracker-2.7.8/offtracker.egg-info/SOURCES.txt +0 -25
- offtracker-2.7.8/scripts/offtracker_candidates.py +0 -307
- {offtracker-2.7.8 → offtracker-2.10.0}/LICENSE.txt +0 -0
- {offtracker-2.7.8 → offtracker-2.10.0}/MANIFEST.in +0 -0
- {offtracker-2.7.8 → offtracker-2.10.0}/offtracker/X_offtracker.py +0 -0
- {offtracker-2.7.8 → offtracker-2.10.0}/offtracker/__init__.py +0 -0
- {offtracker-2.7.8/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/1.3_bdg_normalize_v4.0.py +0 -0
- {offtracker-2.7.8/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/bedGraphToBigWig +0 -0
- {offtracker-2.7.8/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/hg38.chrom.sizes +0 -0
- {offtracker-2.7.8/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/mm10.chrom.sizes +0 -0
- {offtracker-2.7.8/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/offtracker_blacklist_hg38.merged.bed +0 -0
- {offtracker-2.7.8/offtracker/mapping → offtracker-2.10.0/offtracker/utility}/offtracker_blacklist_mm10.merged.bed +0 -0
- {offtracker-2.7.8 → offtracker-2.10.0}/offtracker.egg-info/dependency_links.txt +0 -0
- {offtracker-2.7.8 → offtracker-2.10.0}/offtracker.egg-info/requires.txt +0 -0
- {offtracker-2.7.8 → offtracker-2.10.0}/offtracker.egg-info/top_level.txt +0 -0
- {offtracker-2.7.8 → offtracker-2.10.0}/setup.cfg +0 -0
@@ -1,20 +1,22 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
|
4
|
-
# 2023.08.11.
|
4
|
+
# 2023.08.11. adding a option for not normalizing the bw file
|
5
|
+
# 2025.05.22. refine the structure
|
6
|
+
# 2025.06.05. 增加 ignore_chr 选项,默认只取 common chromosomes,用于 1.1_bed2fr.py
|
5
7
|
|
6
8
|
import argparse
|
7
9
|
import os, glob, yaml
|
8
10
|
import pandas as pd
|
9
11
|
import shutil, re
|
10
12
|
import offtracker
|
13
|
+
import offtracker.X_sequence as xseq
|
11
14
|
script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
|
12
|
-
|
13
|
-
os.chmod( os.path.join(script_folder, 'bedGraphToBigWig'), 0o755)
|
15
|
+
utility_dir = os.path.join(script_dir, 'utility')
|
14
16
|
|
15
17
|
###
|
16
18
|
parser = argparse.ArgumentParser()
|
17
|
-
parser.description='Mapping fastq files of
|
19
|
+
parser.description='Mapping fastq files of Tracking-seq.'
|
18
20
|
parser.add_argument('-f','--folder', type=str, required=True, help='Directory of the input folder' )
|
19
21
|
parser.add_argument('-r','--ref' , type=str, required=True, help='The fasta file of reference genome')
|
20
22
|
parser.add_argument('-i','--index' , type=str, required=True, help='The index file of chromap')
|
@@ -25,12 +27,13 @@ parser.add_argument('-t','--thread', type=int, default=4, help='Number of t
|
|
25
27
|
parser.add_argument('--blacklist' , type=str, default='same', help='Blacklist of genome regions in bed format. "none" for no filter')
|
26
28
|
parser.add_argument('--binsize' , type=str, default=100, help='Bin size for calculating bw residue')
|
27
29
|
parser.add_argument('--normalize' , type=str, default='True', help='Whether to normalize the BigWig file. "True" or "False"')
|
30
|
+
parser.add_argument('--ignore_chr' , action='store_true', help='If not set, only chr1-chr22, chrX, chrY, chrM will be analyzed.')
|
28
31
|
|
29
|
-
args = parser.parse_args()
|
30
32
|
|
33
|
+
args = parser.parse_args()
|
31
34
|
|
32
35
|
if (args.genome == 'hg38') or (args.genome == 'mm10'):
|
33
|
-
dir_chrom_sizes = os.path.join(
|
36
|
+
dir_chrom_sizes = os.path.join(utility_dir, f'{args.genome}.chrom.sizes')
|
34
37
|
else:
|
35
38
|
dir_chrom_sizes = args.genome
|
36
39
|
|
@@ -42,7 +45,7 @@ if args.blacklist == 'same':
|
|
42
45
|
args.blacklist = args.genome
|
43
46
|
|
44
47
|
if (args.blacklist == 'hg38') or (args.blacklist == 'mm10'):
|
45
|
-
blacklist = os.path.join(
|
48
|
+
blacklist = os.path.join(utility_dir, f'offtracker_blacklist_{args.blacklist}.merged.bed')
|
46
49
|
else:
|
47
50
|
blacklist = args.blacklist
|
48
51
|
|
@@ -52,59 +55,40 @@ else:
|
|
52
55
|
if not os.path.exists(args.outdir):
|
53
56
|
os.makedirs(args.outdir)
|
54
57
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
prefix = gz_R2.str.extract('(.*)(?:.fq|.fastq)',expand=False)
|
65
|
-
|
66
|
-
nametype = None
|
67
|
-
for a_type in ['_trimmed_2', '_2_val_2','_R2_val_2','_R2','_2']:
|
68
|
-
len_type = len(a_type)
|
69
|
-
if prefix[0][-len_type:] == a_type:
|
70
|
-
nametype = a_type
|
71
|
-
sample_dir = prefix.str[:-len_type]
|
72
|
-
break
|
73
|
-
|
74
|
-
if nametype is None:
|
75
|
-
# pattern 搜索模式,可能会出 bug
|
76
|
-
# find "_R2." or "_2." in prefix[0]
|
77
|
-
pattern = re.compile(r'(_R2\.|_2\.)')
|
78
|
-
m = pattern.search(prefix[0])
|
79
|
-
if m:
|
80
|
-
nametype = prefix[0][m.span()[0]:]
|
81
|
-
len_type = len(nametype)
|
82
|
-
sample_dir = prefix.str[:-len_type]
|
83
|
-
|
84
|
-
assert nametype is not None, 'No fastq detected or the file name is invaild!'
|
85
|
-
|
86
|
-
sample_name = sample_dir.apply(os.path.basename)
|
58
|
+
if args.ignore_chr:
|
59
|
+
args.ignore_chr = '--ignore_chr'
|
60
|
+
else:
|
61
|
+
args.ignore_chr = ''
|
62
|
+
|
63
|
+
# 搜索 folder 的 n级子目录下的所有 fastq/fastq.gz/fq/fq.gz 文件
|
64
|
+
sample_names, files_R1, files_R2 = xseq.detect_fastq(args.folder, n_subfolder=args.subfolder, NGS_type=args.NGS_type)
|
65
|
+
|
66
|
+
assert not isinstance(sample_names, str), 'No fastq file is detected!'
|
87
67
|
|
88
68
|
dict_yaml = {
|
89
|
-
|
90
|
-
'
|
69
|
+
# fastq 信息
|
70
|
+
'files_R1':dict(zip(sample_names,files_R1)),
|
71
|
+
'files_R2':dict(zip(sample_names,files_R2)), # 单端 files_R2=[] 结果会自动为 {}
|
72
|
+
'NGS_type':args.NGS_type,
|
73
|
+
# 输入输出文件夹
|
91
74
|
'input_dir':args.folder,
|
92
75
|
'output_dir':args.outdir,
|
76
|
+
# 运行参数
|
93
77
|
'thread':args.thread,
|
94
78
|
'index':args.index,
|
95
79
|
'fasta':args.ref,
|
96
80
|
'binsize':args.binsize,
|
97
81
|
'blacklist':blacklist,
|
98
|
-
'nametype':nametype,
|
99
82
|
'genomelen':dir_chrom_sizes,
|
100
83
|
'normalize':args.normalize,
|
101
|
-
'
|
84
|
+
'utility_dir':utility_dir,
|
85
|
+
'ignore_chr':args.ignore_chr,
|
102
86
|
}
|
103
87
|
|
104
88
|
with open( os.path.join(args.outdir,'config.yaml'), 'w') as outfile:
|
105
89
|
yaml.dump(dict_yaml, outfile, default_flow_style=False)
|
106
90
|
|
107
|
-
snakefile = os.path.join(script_dir, '
|
91
|
+
snakefile = os.path.join(script_dir, 'snakefile/Snakefile_offtracker.smk')
|
108
92
|
shutil.copy(snakefile, os.path.join(args.outdir,'Snakefile'))
|
109
93
|
|
110
94
|
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
import offtracker.X_offplot as xoffplot
|
5
|
+
import pandas as pd
|
6
|
+
import argparse
|
7
|
+
import os
|
8
|
+
|
9
|
+
def main():
|
10
|
+
parser = argparse.ArgumentParser()
|
11
|
+
parser.description='Draw the plot of the off-targets with genomic sequences.\nIf .pdf file is too large, try to use .png file instead.'
|
12
|
+
parser.add_argument('--result' , type=str, required=True, help='The file of Offtracker_result_{outname}.csv' )
|
13
|
+
parser.add_argument('--sgrna' , type=str, required=True, help='Not including PAM' )
|
14
|
+
parser.add_argument('--pam' , type=str, default='NGG', help='PAM sequence. Default is "NGG".' )
|
15
|
+
parser.add_argument('--output' , type=str, default='same', help='The output file. Default is Offtracker_result_{outname}.pdf')
|
16
|
+
|
17
|
+
args = parser.parse_args()
|
18
|
+
if args.output == 'same':
|
19
|
+
dir_savefig = args.result.replace('.csv', '.pdf')
|
20
|
+
else:
|
21
|
+
dir_savefig = args.output
|
22
|
+
|
23
|
+
outname = os.path.basename(args.result).replace('Offtracker_result_', '').replace('.csv', '')
|
24
|
+
gRNA = args.sgrna
|
25
|
+
PAM = args.pam
|
26
|
+
full_seq = gRNA + PAM
|
27
|
+
|
28
|
+
df_result = pd.read_csv(args.result)
|
29
|
+
n_pos = len(df_result)
|
30
|
+
|
31
|
+
xoffplot.offtable(df_result, full_seq, length_pam = len(PAM), col_seq='target', threshold=2,
|
32
|
+
title=f'{outname} ({n_pos} sites)',
|
33
|
+
savefig=dir_savefig)
|
34
|
+
|
35
|
+
return f'The plot is saved as {dir_savefig}'
|
36
|
+
|
37
|
+
if __name__ == '__main__' :
|
38
|
+
result = main()
|
39
|
+
print(result)
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
THIS_VERSION = '0.4.1'
|
5
|
+
|
6
|
+
import argparse
|
7
|
+
import os, glob, yaml
|
8
|
+
import pandas as pd
|
9
|
+
import shutil, re
|
10
|
+
import offtracker
|
11
|
+
import offtracker.X_sequence as xseq
|
12
|
+
|
13
|
+
script_dir = os.path.abspath(os.path.dirname(offtracker.__file__))
|
14
|
+
utility_dir = os.path.join(script_dir, 'utility')
|
15
|
+
os.chmod( os.path.join(utility_dir, 'bedGraphToBigWig'), 0o755)
|
16
|
+
|
17
|
+
###
|
18
|
+
parser = argparse.ArgumentParser()
|
19
|
+
parser.description=f'xbulk_qc v{THIS_VERSION}. QC and trim fastq files.'
|
20
|
+
parser.add_argument('-f','--folder', type=str, required=True, help='Directory of the input folder' )
|
21
|
+
parser.add_argument('-o','--outdir', type=str, default='same', help='The output folder')
|
22
|
+
parser.add_argument('--subfolder' , type=int, default=0, help='subfolder level')
|
23
|
+
parser.add_argument('-t','--thread', type=int, default=8, help='Number of threads to be used')
|
24
|
+
parser.add_argument('--NGS_type' , type=str, default='paired-end', help='paired-end or single-end')
|
25
|
+
|
26
|
+
args = parser.parse_args()
|
27
|
+
|
28
|
+
# 自动化的参数调整和报错
|
29
|
+
if args.outdir == 'same':
|
30
|
+
args.outdir = os.path.join(args.folder,'Trimmed_data')
|
31
|
+
if not os.path.exists( args.outdir ):
|
32
|
+
os.makedirs( args.outdir )
|
33
|
+
else:
|
34
|
+
if not os.path.exists(args.outdir):
|
35
|
+
os.makedirs(args.outdir)
|
36
|
+
|
37
|
+
# 搜索 folder 的 n级子目录下的所有 fastq/fastq.gz/fq/fq.gz 文件
|
38
|
+
sample_names, files_R1, files_R2 = xseq.detect_fastq(args.folder, n_subfolder=args.subfolder, NGS_type=args.NGS_type)
|
39
|
+
|
40
|
+
assert not isinstance(sample_names, str), 'No fastq file is detected!'
|
41
|
+
|
42
|
+
dict_yaml = {
|
43
|
+
# fastq 信息
|
44
|
+
'files_R1':dict(zip(sample_names,files_R1)),
|
45
|
+
'files_R2':dict(zip(sample_names,files_R2)), # 单端 files_R2=[] 结果会自动为 {}
|
46
|
+
'NGS_type':args.NGS_type,
|
47
|
+
# 输入输出文件夹
|
48
|
+
'input_dir':args.folder,
|
49
|
+
'output_dir':args.outdir,
|
50
|
+
# 运行参数
|
51
|
+
'thread':args.thread,
|
52
|
+
'utility_dir':utility_dir
|
53
|
+
}
|
54
|
+
|
55
|
+
|
56
|
+
with open( os.path.join(args.outdir,'config.yaml'), 'w', encoding='utf-8') as outfile:
|
57
|
+
yaml.dump(dict_yaml, outfile, default_flow_style=False)
|
58
|
+
|
59
|
+
snakefile = os.path.join(script_dir, 'snakefile/Snakefile_QC.smk')
|
60
|
+
shutil.copy(snakefile, os.path.join(args.outdir,'Snakefile'))
|
61
|
+
|
62
|
+
|
@@ -11,7 +11,7 @@ from setuptools import find_packages, setup, Command
|
|
11
11
|
NAME = 'offtracker'
|
12
12
|
DESCRIPTION = 'Tracking-seq data analysis'
|
13
13
|
AUTHOR = 'Runda Xu'
|
14
|
-
EMAIL = '
|
14
|
+
EMAIL = 'xrd18@tsinghua.org.cn'
|
15
15
|
URL = 'https://github.com/Lan-lab/offtracker'
|
16
16
|
REQUIRES_PYTHON = '>=3.6.0'
|
17
17
|
|
@@ -47,9 +47,13 @@ setup(
|
|
47
47
|
author_email=EMAIL,
|
48
48
|
url=URL,
|
49
49
|
python_requires=REQUIRES_PYTHON,
|
50
|
-
packages=
|
51
|
-
package_data={'offtracker': ['
|
52
|
-
scripts = ['scripts/
|
50
|
+
packages=['offtracker'],
|
51
|
+
package_data={'offtracker': ['snakefile/*','utility/*']},
|
52
|
+
scripts = ['scripts/offtracker_qc.py',
|
53
|
+
'scripts/offtracker_config.py',
|
54
|
+
'scripts/offtracker_candidates.py',
|
55
|
+
'scripts/offtracker_analysis.py',
|
56
|
+
'scripts/offtracker_plot.py'],
|
53
57
|
install_requires=REQUIRED,
|
54
58
|
include_package_data=True
|
55
59
|
)
|
offtracker-2.7.8/PKG-INFO
DELETED
@@ -1,146 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.1
|
2
|
-
Name: offtracker
|
3
|
-
Version: 2.7.8
|
4
|
-
Summary: Tracking-seq data analysis
|
5
|
-
Home-page: https://github.com/Lan-lab/offtracker
|
6
|
-
Author: Runda Xu
|
7
|
-
Author-email: runda.xu@foxmail.com
|
8
|
-
Requires-Python: >=3.6.0
|
9
|
-
Description-Content-Type: text/markdown
|
10
|
-
License-File: LICENSE.txt
|
11
|
-
|
12
|
-
|
13
|
-
OFF-TRACKER
|
14
|
-
=======================
|
15
|
-
|
16
|
-
OFF-TRACKER is an end to end pipeline of Tracking-seq data analysis for detecting off-target sites of any genome editing tools that generate double-strand breaks (DSBs) or single-strand breaks (SSBs).
|
17
|
-
|
18
|
-
System requirements
|
19
|
-
-----
|
20
|
-
* Linux/Unix
|
21
|
-
* Python >= 3.6
|
22
|
-
|
23
|
-
Dependency
|
24
|
-
-----
|
25
|
-
|
26
|
-
```bash
|
27
|
-
# We recommend creating a new enviroment using mamba/conda to avoid compatibility problems
|
28
|
-
# If you don't use mamba, just replace the code with conda
|
29
|
-
mamba create -n offtracker -c bioconda blast snakemake pybedtools
|
30
|
-
```
|
31
|
-
|
32
|
-
|
33
|
-
Installation
|
34
|
-
-----
|
35
|
-
|
36
|
-
```bash
|
37
|
-
# activate the environment
|
38
|
-
conda activate offtracker
|
39
|
-
|
40
|
-
# Direct installation with pip
|
41
|
-
pip install offtracker
|
42
|
-
|
43
|
-
# (Alternative) Download the offtracker from github
|
44
|
-
git clone https://github.com/Lan-lab/offtracker.git
|
45
|
-
cd offtracker
|
46
|
-
pip install .
|
47
|
-
```
|
48
|
-
|
49
|
-
|
50
|
-
Before analyzing samples
|
51
|
-
-----
|
52
|
-
|
53
|
-
```bash
|
54
|
-
# Build blast index (only need once for each genome)
|
55
|
-
makeblastdb -input_type fasta -title hg38 -dbtype nucl -parse_seqids \
|
56
|
-
-in /Your_Path_To_Reference/hg38_genome.fa \
|
57
|
-
-out /Your_Path_To_Reference/hg38_genome.blastdb \
|
58
|
-
-logfile /Your_Path_To_Reference/hg38_genome.blastdb.log
|
59
|
-
|
60
|
-
# Build chromap index (only need once for each genome)
|
61
|
-
chromap -i -r /Your_Path_To_Reference/hg38_genome.fa \
|
62
|
-
-o /Your_Path_To_Reference/hg38_genome.chromap.index
|
63
|
-
|
64
|
-
# Generate candidate regions by sgRNA sequence (need once for each genome and sgRNA)
|
65
|
-
offtracker_candidates.py -t 8 -g hg38 \
|
66
|
-
-r /Your_Path_To_Reference/hg38_genome.fa \
|
67
|
-
-b /Your_Path_To_Reference/hg38_genome.blastdb \
|
68
|
-
--name 'HEK4' --sgrna 'GGCACTGCGGCTGGAGGTGG' --pam 'NGG' \
|
69
|
-
-o /Your_Path_To_Candidates
|
70
|
-
|
71
|
-
```
|
72
|
-
|
73
|
-
Strand-specific mapping of Tracking-seq data
|
74
|
-
-----
|
75
|
-
|
76
|
-
```bash
|
77
|
-
# Generate snakemake config file
|
78
|
-
offtracker_config.py -t 8 -g hg38 --blacklist hg38 \
|
79
|
-
-r /Your_Path_To_Reference/hg38_genome.fa \
|
80
|
-
-i /Your_Path_To_Reference/hg38_genome.chromap.index \
|
81
|
-
-f /Your_Path_To_Fastq \
|
82
|
-
-o /Your_Path_To_Output \
|
83
|
-
--subfolder 0
|
84
|
-
|
85
|
-
# --subfolder: If different samples are in seperate folders, set this to 1
|
86
|
-
# -o: Default is outputting to /Your_Path_To_Fastq
|
87
|
-
|
88
|
-
# Run the snakemake program
|
89
|
-
cd /Your_Path_To_Fastq
|
90
|
-
snakemake -np # dry run
|
91
|
-
nohup snakemake --cores 16 1>snakemake.log 2>snakemake.err &
|
92
|
-
|
93
|
-
## about cores
|
94
|
-
# --cores of snakemake must be larger than -t of offtracker_config.py
|
95
|
-
# parallel number = cores/t
|
96
|
-
|
97
|
-
## about output
|
98
|
-
# This part will generate "*.fw.scaled.bw" and ".rv.scaled.bw" for IGV visualization
|
99
|
-
# "*.fw.bed" and "*.rv.bed" are used in the next part.
|
100
|
-
```
|
101
|
-
|
102
|
-
|
103
|
-
Analyzing the off-target sites
|
104
|
-
-----
|
105
|
-
|
106
|
-
```bash
|
107
|
-
# In this part, multiple samples in the same condition can be analyzed in a single run by pattern recogonization of sample names
|
108
|
-
|
109
|
-
offtracker_analysis.py -g hg38 --name "HEK4" \
|
110
|
-
--exp 'Cas9_HEK4.*293' \
|
111
|
-
--control 'control' \
|
112
|
-
--outname 'Cas9_HEK4_293' \
|
113
|
-
-f /Your_Path_To_Output \
|
114
|
-
--seqfolder /Your_Path_To_Candidates
|
115
|
-
|
116
|
-
# --name: the same as that in offtracker_candidates.py
|
117
|
-
# --exp/--control: add one or multiple patterns of file name in regex
|
118
|
-
|
119
|
-
|
120
|
-
# This step will generate Trackseq_result_{outname}.csv
|
121
|
-
# Intermediate files are saved in ./temp folder, which can be deleted
|
122
|
-
# Keeping the intermediate files can make the analysis faster if involving previously analyzed samples (e.g. using the same control samples for different analyses)
|
123
|
-
```
|
124
|
-
|
125
|
-
|
126
|
-
Note1
|
127
|
-
--------------
|
128
|
-
The default setting only includes chr1-chr22, chrX, chrY, and chrM.
|
129
|
-
|
130
|
-
Please make sure the reference genome contains "chr" at the beginning.
|
131
|
-
|
132
|
-
If you have requirement for other chromosomes or species other than human/mouse, please post an issue.
|
133
|
-
|
134
|
-
Note2
|
135
|
-
--------------
|
136
|
-
Currently, this software is only ready-to-use for mm10 and hg38.
|
137
|
-
|
138
|
-
For any other genome, say hg19, please add genome size file named "hg19.chrom.sizes" to .\offtracker\mapping before install.
|
139
|
-
|
140
|
-
Besides, add "--blacklist none" or "--blacklist Your_Blacklist" when running offtracker_config.py
|
141
|
-
|
142
|
-
Note3
|
143
|
-
--------------
|
144
|
-
The FDR in the Tracking-seq result is not rigorous to the real off-target probability.
|
145
|
-
It is strongly recommended to observe the "fw.scaled.bw" and "rv.scaled.bw" using IGV to check each target location from the Tracking-seq result.
|
146
|
-
|
offtracker-2.7.8/README.md
DELETED
@@ -1,134 +0,0 @@
|
|
1
|
-
OFF-TRACKER
|
2
|
-
=======================
|
3
|
-
|
4
|
-
OFF-TRACKER is an end to end pipeline of Tracking-seq data analysis for detecting off-target sites of any genome editing tools that generate double-strand breaks (DSBs) or single-strand breaks (SSBs).
|
5
|
-
|
6
|
-
System requirements
|
7
|
-
-----
|
8
|
-
* Linux/Unix
|
9
|
-
* Python >= 3.6
|
10
|
-
|
11
|
-
Dependency
|
12
|
-
-----
|
13
|
-
|
14
|
-
```bash
|
15
|
-
# We recommend creating a new enviroment using mamba/conda to avoid compatibility problems
|
16
|
-
# If you don't use mamba, just replace the code with conda
|
17
|
-
mamba create -n offtracker -c bioconda blast snakemake pybedtools
|
18
|
-
```
|
19
|
-
|
20
|
-
|
21
|
-
Installation
|
22
|
-
-----
|
23
|
-
|
24
|
-
```bash
|
25
|
-
# activate the environment
|
26
|
-
conda activate offtracker
|
27
|
-
|
28
|
-
# Direct installation with pip
|
29
|
-
pip install offtracker
|
30
|
-
|
31
|
-
# (Alternative) Download the offtracker from github
|
32
|
-
git clone https://github.com/Lan-lab/offtracker.git
|
33
|
-
cd offtracker
|
34
|
-
pip install .
|
35
|
-
```
|
36
|
-
|
37
|
-
|
38
|
-
Before analyzing samples
|
39
|
-
-----
|
40
|
-
|
41
|
-
```bash
|
42
|
-
# Build blast index (only need once for each genome)
|
43
|
-
makeblastdb -input_type fasta -title hg38 -dbtype nucl -parse_seqids \
|
44
|
-
-in /Your_Path_To_Reference/hg38_genome.fa \
|
45
|
-
-out /Your_Path_To_Reference/hg38_genome.blastdb \
|
46
|
-
-logfile /Your_Path_To_Reference/hg38_genome.blastdb.log
|
47
|
-
|
48
|
-
# Build chromap index (only need once for each genome)
|
49
|
-
chromap -i -r /Your_Path_To_Reference/hg38_genome.fa \
|
50
|
-
-o /Your_Path_To_Reference/hg38_genome.chromap.index
|
51
|
-
|
52
|
-
# Generate candidate regions by sgRNA sequence (need once for each genome and sgRNA)
|
53
|
-
offtracker_candidates.py -t 8 -g hg38 \
|
54
|
-
-r /Your_Path_To_Reference/hg38_genome.fa \
|
55
|
-
-b /Your_Path_To_Reference/hg38_genome.blastdb \
|
56
|
-
--name 'HEK4' --sgrna 'GGCACTGCGGCTGGAGGTGG' --pam 'NGG' \
|
57
|
-
-o /Your_Path_To_Candidates
|
58
|
-
|
59
|
-
```
|
60
|
-
|
61
|
-
Strand-specific mapping of Tracking-seq data
|
62
|
-
-----
|
63
|
-
|
64
|
-
```bash
|
65
|
-
# Generate snakemake config file
|
66
|
-
offtracker_config.py -t 8 -g hg38 --blacklist hg38 \
|
67
|
-
-r /Your_Path_To_Reference/hg38_genome.fa \
|
68
|
-
-i /Your_Path_To_Reference/hg38_genome.chromap.index \
|
69
|
-
-f /Your_Path_To_Fastq \
|
70
|
-
-o /Your_Path_To_Output \
|
71
|
-
--subfolder 0
|
72
|
-
|
73
|
-
# --subfolder: If different samples are in seperate folders, set this to 1
|
74
|
-
# -o: Default is outputting to /Your_Path_To_Fastq
|
75
|
-
|
76
|
-
# Run the snakemake program
|
77
|
-
cd /Your_Path_To_Fastq
|
78
|
-
snakemake -np # dry run
|
79
|
-
nohup snakemake --cores 16 1>snakemake.log 2>snakemake.err &
|
80
|
-
|
81
|
-
## about cores
|
82
|
-
# --cores of snakemake must be larger than -t of offtracker_config.py
|
83
|
-
# parallel number = cores/t
|
84
|
-
|
85
|
-
## about output
|
86
|
-
# This part will generate "*.fw.scaled.bw" and ".rv.scaled.bw" for IGV visualization
|
87
|
-
# "*.fw.bed" and "*.rv.bed" are used in the next part.
|
88
|
-
```
|
89
|
-
|
90
|
-
|
91
|
-
Analyzing the off-target sites
|
92
|
-
-----
|
93
|
-
|
94
|
-
```bash
|
95
|
-
# In this part, multiple samples in the same condition can be analyzed in a single run by pattern recogonization of sample names
|
96
|
-
|
97
|
-
offtracker_analysis.py -g hg38 --name "HEK4" \
|
98
|
-
--exp 'Cas9_HEK4.*293' \
|
99
|
-
--control 'control' \
|
100
|
-
--outname 'Cas9_HEK4_293' \
|
101
|
-
-f /Your_Path_To_Output \
|
102
|
-
--seqfolder /Your_Path_To_Candidates
|
103
|
-
|
104
|
-
# --name: the same as that in offtracker_candidates.py
|
105
|
-
# --exp/--control: add one or multiple patterns of file name in regex
|
106
|
-
|
107
|
-
|
108
|
-
# This step will generate Trackseq_result_{outname}.csv
|
109
|
-
# Intermediate files are saved in ./temp folder, which can be deleted
|
110
|
-
# Keeping the intermediate files can make the analysis faster if involving previously analyzed samples (e.g. using the same control samples for different analyses)
|
111
|
-
```
|
112
|
-
|
113
|
-
|
114
|
-
Note1
|
115
|
-
--------------
|
116
|
-
The default setting only includes chr1-chr22, chrX, chrY, and chrM.
|
117
|
-
|
118
|
-
Please make sure the reference genome contains "chr" at the beginning.
|
119
|
-
|
120
|
-
If you have requirement for other chromosomes or species other than human/mouse, please post an issue.
|
121
|
-
|
122
|
-
Note2
|
123
|
-
--------------
|
124
|
-
Currently, this software is only ready-to-use for mm10 and hg38.
|
125
|
-
|
126
|
-
For any other genome, say hg19, please add genome size file named "hg19.chrom.sizes" to .\offtracker\mapping before install.
|
127
|
-
|
128
|
-
Besides, add "--blacklist none" or "--blacklist Your_Blacklist" when running offtracker_config.py
|
129
|
-
|
130
|
-
Note3
|
131
|
-
--------------
|
132
|
-
The FDR in the Tracking-seq result is not rigorous to the real off-target probability.
|
133
|
-
It is strongly recommended to observe the "fw.scaled.bw" and "rv.scaled.bw" using IGV to check each target location from the Tracking-seq result.
|
134
|
-
|
@@ -1,28 +0,0 @@
|
|
1
|
-
__version__ = "2.7.8"
|
2
|
-
# 2023.08.11. v1.1.0 adding a option for not normalizing the bw file
|
3
|
-
# 2023.10.26. v1.9.0 prerelease for v2.0
|
4
|
-
# 2023.10.27. v2.0.0 大更新,还没微调
|
5
|
-
# 2023.10.28. v2.1.0 修复bug,增加计算信号长度的功能
|
6
|
-
# 2023.10.28. v2.2.0 修复bug,改变计算信号长度的算法
|
7
|
-
# 2023.10.29. v2.3.0 增加 overall signal 计算
|
8
|
-
# 2023.11.01. v2.3.1 增加 signal_only 选项
|
9
|
-
# 2023.11.02. v2.3.2 修改 sample signal 和 group mean 的计算顺序
|
10
|
-
# 2023.11.04. v2.3.3 修复 overall score 标准化时排序错误的问题
|
11
|
-
# 2023.11.05. v2.3.4 修复判断单边溢出信号时的列名选取错误
|
12
|
-
# 2023.11.13. v2.3.5 微调 track score
|
13
|
-
# 2023.12.05. v2.3.6 candidates 增加 cleavage site,修正 alignment 有 deletion 会错位的 bug
|
14
|
-
# 2023.12.05. v2.3.7 用 cleavage site 代替 midpoint # 还没改完
|
15
|
-
# 2023.12.07. v2.3.8 df_score 增加 df_exp, df_ctr 各自列。修复没 df_ctr 时的 bug。track score 用 proximal
|
16
|
-
# 2023.12.09. v2.4.0 为了兼顾 proximal 和 overall,当 normalized overall signal 高于 2 时,增加 overall signal 的加分
|
17
|
-
# 2023.12.09. v2.5.0 尝试新的加权位置
|
18
|
-
# 2023.12.10. v2.6.0 加入 trackseq v4 的计算分支,即考虑 Region 内的 positive_pct,避免短而尖锐的信号
|
19
|
-
# 2023.12.10. v2.6.1 有些非特异信号数值很大,如果在 control 组是大负数,可能导致减 control 后假高信号,因此给负数一个 clip
|
20
|
-
# 2023.12.30. v2.7.0 增加 X_offplot 模块,用于绘图
|
21
|
-
# 2023.12.31. v2.7.1 control 的负数值 clip 由 -5 改为 -1,进一步减少假阳性。另外不加 overall 了
|
22
|
-
# 2024.01.01. v2.7.2 权重改为 proximal + pct = 1 + 1. 防信号外溢假阳性标准由<0改为<=0
|
23
|
-
# 2024.01.02. v2.7.3 flank regions 默认值改为 1000 2000 3000 5000。之前 control 的负数值 clip 相当于直接在 final score,现在改为每个单独 clip 后重新算 score,默认值为 CtrClip=-0.5
|
24
|
-
# 2024.01.03. v2.7.4 更新了 blacklist.bed
|
25
|
-
# 2024.01.04. v2.7.5 更新了 hg38 blacklist.bed
|
26
|
-
# 2024.01.12. v2.7.6 修复小bug,输出 fdr 改为 <0.05。
|
27
|
-
# 2024.01.23. v2.7.7 Snakefile_offtracker: add --fixedStep to bigwigCompare for not merging neighbouring bins with equal values.
|
28
|
-
# 2024.02.01. v2.7.8 逐步添加 X_offplot.py 功能
|