offtracker 2.7.7__zip → 2.7.10__zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. offtracker-2.7.10/PKG-INFO +189 -0
  2. offtracker-2.7.10/README.md +177 -0
  3. offtracker-2.7.10/offtracker/X_offplot.py +539 -0
  4. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/X_offtracker.py +2 -1
  5. offtracker-2.7.10/offtracker/_version.py +30 -0
  6. offtracker-2.7.10/offtracker.egg-info/PKG-INFO +189 -0
  7. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker.egg-info/SOURCES.txt +2 -1
  8. {offtracker-2.7.7 → offtracker-2.7.10}/scripts/offtracker_analysis.py +18 -9
  9. offtracker-2.7.10/scripts/offtracker_plot.py +39 -0
  10. {offtracker-2.7.7 → offtracker-2.7.10}/setup.py +5 -2
  11. offtracker-2.7.7/PKG-INFO +0 -146
  12. offtracker-2.7.7/README.md +0 -134
  13. offtracker-2.7.7/offtracker/X_offplot.py +0 -123
  14. offtracker-2.7.7/offtracker/_version.py +0 -27
  15. offtracker-2.7.7/offtracker.egg-info/PKG-INFO +0 -146
  16. {offtracker-2.7.7 → offtracker-2.7.10}/LICENSE.txt +0 -0
  17. {offtracker-2.7.7 → offtracker-2.7.10}/MANIFEST.in +0 -0
  18. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/X_sequence.py +0 -0
  19. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/__init__.py +0 -0
  20. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/1.1_bed2fr_v4.5.py +0 -0
  21. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/1.3_bdg_normalize_v4.0.py +0 -0
  22. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/Snakefile_offtracker +0 -0
  23. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/bedGraphToBigWig +0 -0
  24. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/hg38.chrom.sizes +0 -0
  25. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/mm10.chrom.sizes +0 -0
  26. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/offtracker_blacklist_hg38.merged.bed +0 -0
  27. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker/mapping/offtracker_blacklist_mm10.merged.bed +0 -0
  28. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker.egg-info/dependency_links.txt +0 -0
  29. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker.egg-info/requires.txt +0 -0
  30. {offtracker-2.7.7 → offtracker-2.7.10}/offtracker.egg-info/top_level.txt +0 -0
  31. {offtracker-2.7.7 → offtracker-2.7.10}/scripts/offtracker_candidates.py +0 -0
  32. {offtracker-2.7.7 → offtracker-2.7.10}/scripts/offtracker_config.py +0 -0
  33. {offtracker-2.7.7 → offtracker-2.7.10}/setup.cfg +0 -0
@@ -1,123 +0,0 @@
1
- import matplotlib.pyplot as plt
2
- import matplotlib.patches as patches
3
- import pandas as pd
4
-
5
- def offtable(offtargets, target_guide,
6
- col_seq='best_target', col_score='track_score', col_mismatch='mismatch', col_loc='target_location',
7
- title=None, font='Arial', font_size=9,
8
- box_size_x=15, box_size_y=20, box_gap=1,
9
- x_offset=15, y_offset=35, dpi=100, savefig=None):
10
- # Facecolor
11
- color_dict = {
12
- 'A': 'lightgreen',
13
- 'T': 'lightblue',
14
- 'C': 'lightcoral',
15
- 'G': 'lightgoldenrodyellow',
16
- 'N': 'lightgrey',
17
- '—': 'orange',
18
- '-': 'orange'
19
- }
20
-
21
- # If offtargets is a DataFrame, convert to list of dictionaries
22
- if isinstance(offtargets, pd.DataFrame):
23
- offtargets = offtargets.to_dict(orient='records')
24
-
25
- # Configuration
26
- # title=None
27
- # font='Arial'
28
- # font_size = 9
29
- # box_size_x = 15 # 一个碱基图形的宽度
30
- # box_size_y = 20 # 一个碱基图形的高度
31
- # box_gap = 1 # 两行之间的间隔
32
- # x_offset = 15
33
- # y_offset = 35
34
- # dpi=100
35
- # col_seq='best_target'
36
- # col_score='track_score'
37
- # col_mismatch='mismatch'
38
- # col_loc='target_location'
39
- width = box_size_x * (len(target_guide) + 15)
40
- height = y_offset + (len(offtargets) + 2) * (box_size_y + box_gap)
41
- fig = plt.figure(figsize=(width / 100.0, height / 100.0), dpi=dpi)
42
- ax = fig.add_subplot(111)
43
-
44
- # Plot a title
45
- ax.text(x_offset, 25, "Off-targets table" if title is None else f"{title}", fontsize=14, family=font)
46
-
47
- # Plot the reference sequence
48
- for i, c in enumerate(target_guide):
49
- x = x_offset + i * box_size_x
50
- y = y_offset
51
- base_color = color_dict.get(c, 'purple') # Default to purple if base is not recognized
52
- ax.add_patch(patches.Rectangle((x, y), box_size_x, box_size_y, facecolor=base_color))
53
- ax.text(x + box_size_x / 2, y + box_size_y / 2, c, ha='center', va='center', family=font, fontsize=font_size)
54
- # add column annotations
55
- ax.text(x_offset + (len(target_guide) + 2) * box_size_x, y_offset + box_size_y / 4, 'Track\nScore', ha='center', va='center', family=font, fontsize=font_size*1.1)
56
- #ax.text(x_offset + (len(target_guide) + 7) * box_size_x, y_offset + box_size_y / 2, 'Mismatch', ha='center', va='center', family=font, fontsize=font_size*1.1)
57
- ax.text(x_offset + (len(target_guide) + 4) * box_size_x, y_offset + box_size_y / 2, 'Coordinates', ha='left', va='center', family=font, fontsize=font_size*1.1)
58
-
59
- # Plot aligned sequences
60
- # 目前有个bug:脱靶序列如果有 insertion,长度会不一致,而且也没想到画图怎么画,只能是默认删掉第一个碱基
61
- for j, seq in enumerate(offtargets):
62
- y = y_offset + (j + 1) * (box_size_y + box_gap)
63
- # 长度不一致的情况
64
- len_out = len(seq[col_seq]) - len(target_guide)
65
- if len_out > 0:
66
- if len_out > 1:
67
- print(f"Warning: {seq[col_seq]} is {len_out} longer than {target_guide}")
68
- # 通过比较删除开头的碱基和最后的碱基,看哪个更接近target_guide
69
- delete_first = seq[col_seq][len_out:]
70
- delete_last = seq[col_seq][:-len_out]
71
- # 计算两个序列和target_guide的hamming distance
72
- hamming_first = sum([1 for i, c in enumerate(delete_first) if c != target_guide[i]])
73
- hamming_last = sum([1 for i, c in enumerate(delete_last) if c != target_guide[i]])
74
- # 选择hamming distance小的那个序列
75
- if hamming_first < hamming_last:
76
- seq[col_seq] = delete_first
77
- else:
78
- seq[col_seq] = delete_last
79
- elif len_out < 0:
80
- print(f"Warning: {seq[col_seq]} is {-len_out} shorter than {target_guide}")
81
-
82
- for i, c in enumerate(seq[col_seq]):
83
- # gap 的 - (minus sign) 太短了,所以替换成 — (em dash)
84
- if c == '-':
85
- c = '—'
86
- x = x_offset + i * box_size_x
87
- base_color = color_dict.get(c, 'purple') # Default to purple if base is not recognized
88
- if c == target_guide[i]:
89
- ax.add_patch(patches.Rectangle((x, y), box_size_x, box_size_y, facecolor='white')) # same
90
- elif target_guide[i] == 'N':
91
- ax.add_patch(patches.Rectangle((x, y), box_size_x, box_size_y, facecolor='white')) # N in target
92
- else:
93
- ax.add_patch(patches.Rectangle((x, y), box_size_x, box_size_y, facecolor=base_color))
94
- ax.text(x + box_size_x / 2, y + box_size_y / 2, "." if c == target_guide[i] else c, ha='center', va='center', family=font, fontsize=font_size, weight='bold')
95
-
96
- # Annotations for score, mismatches, and location coordinates
97
- ax.text(x_offset + (len(target_guide) + 2) * box_size_x, y + box_size_y / 2, round(seq[col_score],2), ha='center', va='center', family=font, fontsize=font_size)
98
- #ax.text(x_offset + (len(target_guide) + 7) * box_size_x, y + box_size_y / 2, "Target" if seq[col_mismatch] == 0 else seq[col_mismatch], ha='center', va='center', family=font, fontsize=font_size, color='red' if seq[col_mismatch] == 0 else 'black')
99
- ax.text(x_offset + (len(target_guide) + 4) * box_size_x, y + box_size_y / 2, seq[col_loc], ha='left', va='center', family=font, fontsize=font_size)
100
-
101
- # add a vertical line to indicate the PAM
102
- x_line = x_offset + (len(target_guide) - 3) * box_size_x
103
- y_start = y_offset # + box_size_y / 2
104
- y_end = y_start + (len(offtargets)+1) * (box_size_y + box_gap)
105
- ax.vlines(x=x_line, ymin=y_start, ymax=y_end, color='indianred', linestyle='--')
106
-
107
- # Styling and save
108
- ax.set_xlim(0, width*1.1) # location 的文字太长了,所以要加长一点
109
- ax.set_ylim(height, 0)
110
- ax.axis('off')
111
-
112
- # # This will make the subplot(s) expand to fill the entire figure area, with no padding on any side.
113
- # # In brief, make the plot bigger (not influence the font size)
114
- plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
115
- if savefig is not None:
116
- plt.savefig(savefig, dpi=dpi)
117
- plt.show()
118
- return ax
119
-
120
-
121
-
122
-
123
-
@@ -1,27 +0,0 @@
1
- __version__ = "2.7.7"
2
- # 2023.08.11. v1.1.0 adding a option for not normalizing the bw file
3
- # 2023.10.26. v1.9.0 prerelease for v2.0
4
- # 2023.10.27. v2.0.0 大更新,还没微调
5
- # 2023.10.28. v2.1.0 修复bug,增加计算信号长度的功能
6
- # 2023.10.28. v2.2.0 修复bug,改变计算信号长度的算法
7
- # 2023.10.29. v2.3.0 增加 overall signal 计算
8
- # 2023.11.01. v2.3.1 增加 signal_only 选项
9
- # 2023.11.02. v2.3.2 修改 sample signal 和 group mean 的计算顺序
10
- # 2023.11.04. v2.3.3 修复 overall score 标准化时排序错误的问题
11
- # 2023.11.05. v2.3.4 修复判断单边溢出信号时的列名选取错误
12
- # 2023.11.13. v2.3.5 微调 track score
13
- # 2023.12.05. v2.3.6 candidates 增加 cleavage site,修正 alignment 有 deletion 会错位的 bug
14
- # 2023.12.05. v2.3.7 用 cleavage site 代替 midpoint # 还没改完
15
- # 2023.12.07. v2.3.8 df_score 增加 df_exp, df_ctr 各自列。修复没 df_ctr 时的 bug。track score 用 proximal
16
- # 2023.12.09. v2.4.0 为了兼顾 proximal 和 overall,当 normalized overall signal 高于 2 时,增加 overall signal 的加分
17
- # 2023.12.09. v2.5.0 尝试新的加权位置
18
- # 2023.12.10. v2.6.0 加入 trackseq v4 的计算分支,即考虑 Region 内的 positive_pct,避免短而尖锐的信号
19
- # 2023.12.10. v2.6.1 有些非特异信号数值很大,如果在 control 组是大负数,可能导致减 control 后假高信号,因此给负数一个 clip
20
- # 2023.12.30. v2.7.0 增加 X_offplot 模块,用于绘图
21
- # 2023.12.31. v2.7.1 control 的负数值 clip 由 -5 改为 -1,进一步减少假阳性。另外不加 overall 了
22
- # 2024.01.01. v2.7.2 权重改为 proximal + pct = 1 + 1. 防信号外溢假阳性标准由<0改为<=0
23
- # 2024.01.02. v2.7.3 flank regions 默认值改为 1000 2000 3000 5000。之前 control 的负数值 clip 相当于直接在 final score,现在改为每个单独 clip 后重新算 score,默认值为 CtrClip=-0.5
24
- # 2024.01.03. v2.7.4 更新了 blacklist.bed
25
- # 2024.01.04. v2.7.5 更新了 hg38 blacklist.bed
26
- # 2024.01.12. v2.7.6 修复小bug,输出 fdr 改为 <0.05。
27
- # 2024.01.23. v2.7.7 Snakefile_offtracker: add --fixedStep to bigwigCompare for not merging neighbouring bins with equal values.
@@ -1,146 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: offtracker
3
- Version: 2.7.7
4
- Summary: Track-seq data analysis
5
- Home-page: https://github.com/Lan-lab/offtracker
6
- Author: Runda Xu
7
- Author-email: runda.xu@foxmail.com
8
- Requires-Python: >=3.6.0
9
- Description-Content-Type: text/markdown
10
- License-File: LICENSE.txt
11
-
12
-
13
- OFF-TRACKER
14
- =======================
15
-
16
- OFF-TRACKER is an end to end pipeline of Track-seq data analysis for detecting off-target sites of any genome editing tools that generate double-strand breaks (DSBs) or single-strand breaks (SSBs).
17
-
18
- System requirements
19
- -----
20
- * Linux/Unix
21
- * Python >= 3.6
22
-
23
- Dependency
24
- -----
25
-
26
- ```bash
27
- # We recommend creating a new enviroment using mamba/conda to avoid compatibility problems
28
- # If you don't use mamba, just replace the code with conda
29
- mamba create -n offtracker -c bioconda blast snakemake pybedtools
30
- ```
31
-
32
-
33
- Installation
34
- -----
35
-
36
- ```bash
37
- # activate the environment
38
- conda activate offtracker
39
-
40
- # Direct installation with pip
41
- pip install offtracker
42
-
43
- # (Alternative) Download the offtracker from github
44
- git clone https://github.com/Lan-lab/offtracker.git
45
- cd offtracker
46
- pip install .
47
- ```
48
-
49
-
50
- Before analyzing samples
51
- -----
52
-
53
- ```bash
54
- # Build blast index (only need once for each genome)
55
- makeblastdb -input_type fasta -title hg38 -dbtype nucl -parse_seqids \
56
- -in /Your_Path_To_Reference/hg38_genome.fa \
57
- -out /Your_Path_To_Reference/hg38_genome.blastdb \
58
- -logfile /Your_Path_To_Reference/hg38_genome.blastdb.log
59
-
60
- # Build chromap index (only need once for each genome)
61
- chromap -i -r /Your_Path_To_Reference/hg38_genome.fa \
62
- -o /Your_Path_To_Reference/hg38_genome.chromap.index
63
-
64
- # Generate candidate regions by sgRNA sequence (need once for each genome and sgRNA)
65
- offtracker_candidates.py -t 8 -g hg38 \
66
- -r /Your_Path_To_Reference/hg38_genome.fa \
67
- -b /Your_Path_To_Reference/hg38_genome.blastdb \
68
- --name 'HEK4' --sgrna 'GGCACTGCGGCTGGAGGTGG' --pam 'NGG' \
69
- -o /Your_Path_To_Candidates
70
-
71
- ```
72
-
73
- Strand-specific mapping of Track-seq data
74
- -----
75
-
76
- ```bash
77
- # Generate snakemake config file
78
- offtracker_config.py -t 8 -g hg38 --blacklist hg38 \
79
- -r /Your_Path_To_Reference/hg38_genome.fa \
80
- -i /Your_Path_To_Reference/hg38_genome.chromap.index \
81
- -f /Your_Path_To_Fastq \
82
- -o /Your_Path_To_Output \
83
- --subfolder 0
84
-
85
- # --subfolder: If different samples are in seperate folders, set this to 1
86
- # -o: Default is outputting to /Your_Path_To_Fastq
87
-
88
- # Run the snakemake program
89
- cd /Your_Path_To_Fastq
90
- snakemake -np # dry run
91
- nohup snakemake --cores 16 1>snakemake.log 2>snakemake.err &
92
-
93
- ## about cores
94
- # --cores of snakemake must be larger than -t of offtracker_config.py
95
- # parallel number = cores/t
96
-
97
- ## about output
98
- # This part will generate "*.fw.scaled.bw" and ".rv.scaled.bw" for IGV visualization
99
- # "*.fw.bed" and "*.rv.bed" are used in the next part.
100
- ```
101
-
102
-
103
- Analyzing the off-target sites
104
- -----
105
-
106
- ```bash
107
- # In this part, multiple samples in the same condition can be analyzed in a single run by pattern recogonization of sample names
108
-
109
- offtracker_analysis.py -g hg38 --name "HEK4" \
110
- --exp 'Cas9_HEK4.*293' \
111
- --control 'control' \
112
- --outname 'Cas9_HEK4_293' \
113
- -f /Your_Path_To_Output \
114
- --seqfolder /Your_Path_To_Candidates
115
-
116
- # --name: the same as that in offtracker_candidates.py
117
- # --exp/--control: add one or multiple patterns of file name in regex
118
-
119
-
120
- # This step will generate Trackseq_result_{outname}.csv
121
- # Intermediate files are saved in ./temp folder, which can be deleted
122
- # Keeping the intermediate files can make the analysis faster if involving previously analyzed samples (e.g. using the same control samples for different analyses)
123
- ```
124
-
125
-
126
- Note1
127
- --------------
128
- The default setting only includes chr1-chr22, chrX, chrY, and chrM.
129
-
130
- Please make sure the reference genome contains "chr" at the beginning.
131
-
132
- If you have requirement for other chromosomes or species other than human/mouse, please post an issue.
133
-
134
- Note2
135
- --------------
136
- Currently, this software is only ready-to-use for mm10 and hg38.
137
-
138
- For any other genome, say hg19, please add genome size file named "hg19.chrom.sizes" to .\offtracker\mapping before install.
139
-
140
- Besides, add "--blacklist none" or "--blacklist Your_Blacklist" when running offtracker_config.py
141
-
142
- Note3
143
- --------------
144
- The FDR in the Track-seq result is not rigorous to the real off-target probability.
145
- It is strongly recommended to observe the "fw.scaled.bw" and "rv.scaled.bw" using IGV to check each target location from the Track-seq result.
146
-
File without changes
File without changes
File without changes