RiboParser 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- riboparser-0.1.3/PKG-INFO +1316 -0
- riboparser-0.1.3/README.md +1280 -0
- riboparser-0.1.3/RiboParser.egg-info/PKG-INFO +1316 -0
- riboparser-0.1.3/RiboParser.egg-info/SOURCES.txt +130 -0
- riboparser-0.1.3/RiboParser.egg-info/dependency_links.txt +1 -0
- riboparser-0.1.3/RiboParser.egg-info/entry_points.txt +73 -0
- riboparser-0.1.3/RiboParser.egg-info/requires.txt +17 -0
- riboparser-0.1.3/RiboParser.egg-info/top_level.txt +2 -0
- riboparser-0.1.3/scripts/__init__.py +8 -0
- riboparser-0.1.3/scripts/bam/__init__.py +0 -0
- riboparser-0.1.3/scripts/bam/flt_bam_threads.py +73 -0
- riboparser-0.1.3/scripts/bam/test_flt.py +54 -0
- riboparser-0.1.3/scripts/bedgraph/__init__.py +0 -0
- riboparser-0.1.3/scripts/bedgraph/bg2meta.py +90 -0
- riboparser-0.1.3/scripts/bedgraph/rpm_smooth.py +270 -0
- riboparser-0.1.3/scripts/bedgraph/site2base.py +47 -0
- riboparser-0.1.3/scripts/bowtie/__init__.py +0 -0
- riboparser-0.1.3/scripts/bowtie/merge_bwt_log.py +138 -0
- riboparser-0.1.3/scripts/fasta/__init__.py +0 -0
- riboparser-0.1.3/scripts/fasta/fa_gc_sum.py +99 -0
- riboparser-0.1.3/scripts/fasta/fa_len_flt.py +90 -0
- riboparser-0.1.3/scripts/fasta/fa_len_sum.py +114 -0
- riboparser-0.1.3/scripts/fasta/fa_split.py +110 -0
- riboparser-0.1.3/scripts/fasta/line_feed.py +75 -0
- riboparser-0.1.3/scripts/fasta/nt2aa.py +152 -0
- riboparser-0.1.3/scripts/fasta/rand_seq.py +105 -0
- riboparser-0.1.3/scripts/fasta/retrieve_seq.py +90 -0
- riboparser-0.1.3/scripts/fasta/revs.py +76 -0
- riboparser-0.1.3/scripts/fastq/__init__.py +0 -0
- riboparser-0.1.3/scripts/fastq/fq2fa.py +86 -0
- riboparser-0.1.3/scripts/fastq/fq2txt.py +72 -0
- riboparser-0.1.3/scripts/fastq/fq_cutting.py +88 -0
- riboparser-0.1.3/scripts/fastq/fq_len_flt.py +84 -0
- riboparser-0.1.3/scripts/fastq/fq_len_sum.py +84 -0
- riboparser-0.1.3/scripts/fastq/fq_split.py +94 -0
- riboparser-0.1.3/scripts/fastq/fq_trim.py +86 -0
- riboparser-0.1.3/scripts/fastq/phred_quality.py +96 -0
- riboparser-0.1.3/scripts/merge_ribo/__init__.py +0 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_cdt.py +105 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_coverage.py +91 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_cst.py +103 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_digestion.py +121 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_dst_list.py +108 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_length.py +112 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_metagene.py +93 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_occupancy.py +104 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_odd_ratio.py +93 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_offset.py +117 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_offset_detail.py +162 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_offset_end.py +152 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_pausing.py +109 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_period.py +92 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_quant.py +91 -0
- riboparser-0.1.3/scripts/merge_ribo/merge_saturation.py +100 -0
- riboparser-0.1.3/scripts/oligo/__init__.py +0 -0
- riboparser-0.1.3/scripts/oligo/get_overlap_seq.py +85 -0
- riboparser-0.1.3/scripts/oligo/get_tissue_freq.py +133 -0
- riboparser-0.1.3/scripts/oligo/get_win_seq.py +54 -0
- riboparser-0.1.3/scripts/ribocode/__init__.py +0 -0
- riboparser-0.1.3/scripts/ribocode/ribocode_bed_format.py +254 -0
- riboparser-0.1.3/scripts/ribotish/__init__.py +0 -0
- riboparser-0.1.3/scripts/ribotish/ribotish_format.py +327 -0
- riboparser-0.1.3/scripts/rsem/__init__.py +0 -0
- riboparser-0.1.3/scripts/rsem/merge_rsem.py +76 -0
- riboparser-0.1.3/scripts/unix/__init__.py +0 -0
- riboparser-0.1.3/scripts/unix/dos2unix.py +54 -0
- riboparser-0.1.3/setup.cfg +4 -0
- riboparser-0.1.3/setup.py +141 -0
- riboparser-0.1.3/utils/__init__.py +8 -0
- riboparser-0.1.3/utils/make_ensb_ref.py +308 -0
- riboparser-0.1.3/utils/make_ribo_ref.py +39 -0
- riboparser-0.1.3/utils/ribo/ArgsParser.py +1294 -0
- riboparser-0.1.3/utils/ribo/Bam2Wig.py +292 -0
- riboparser-0.1.3/utils/ribo/BamFilter.py +90 -0
- riboparser-0.1.3/utils/ribo/CDT.py +298 -0
- riboparser-0.1.3/utils/ribo/CST.py +662 -0
- riboparser-0.1.3/utils/ribo/Codon.py +206 -0
- riboparser-0.1.3/utils/ribo/Coefficient_of_Variation.py +340 -0
- riboparser-0.1.3/utils/ribo/Coverage.py +394 -0
- riboparser-0.1.3/utils/ribo/Cumulative_CoV.py +252 -0
- riboparser-0.1.3/utils/ribo/Density.py +226 -0
- riboparser-0.1.3/utils/ribo/Digestion.py +296 -0
- riboparser-0.1.3/utils/ribo/Ensembl_Ref.py +272 -0
- riboparser-0.1.3/utils/ribo/GenePred.py +504 -0
- riboparser-0.1.3/utils/ribo/MetaCodon.py +462 -0
- riboparser-0.1.3/utils/ribo/Metaplot.py +275 -0
- riboparser-0.1.3/utils/ribo/Occupancy.py +293 -0
- riboparser-0.1.3/utils/ribo/Odd_Ratio.py +538 -0
- riboparser-0.1.3/utils/ribo/Offset.py +626 -0
- riboparser-0.1.3/utils/ribo/Offset_RSBM.py +647 -0
- riboparser-0.1.3/utils/ribo/Pausing.py +451 -0
- riboparser-0.1.3/utils/ribo/Periodicity.py +169 -0
- riboparser-0.1.3/utils/ribo/Quality.py +668 -0
- riboparser-0.1.3/utils/ribo/Quant.py +385 -0
- riboparser-0.1.3/utils/ribo/RNA.py +383 -0
- riboparser-0.1.3/utils/ribo/RPFs.py +314 -0
- riboparser-0.1.3/utils/ribo/Retrieve.py +146 -0
- riboparser-0.1.3/utils/ribo/Ribo.py +482 -0
- riboparser-0.1.3/utils/ribo/Shuffle.py +148 -0
- riboparser-0.1.3/utils/ribo/__init__.py +6 -0
- riboparser-0.1.3/utils/ribo_parser.py +88 -0
- riboparser-0.1.3/utils/rna_Density.py +40 -0
- riboparser-0.1.3/utils/rna_Offset.py +48 -0
- riboparser-0.1.3/utils/rpf_Bam2bw.py +40 -0
- riboparser-0.1.3/utils/rpf_Bam_Filter.py +27 -0
- riboparser-0.1.3/utils/rpf_CDT.py +40 -0
- riboparser-0.1.3/utils/rpf_CST.py +42 -0
- riboparser-0.1.3/utils/rpf_Check.py +51 -0
- riboparser-0.1.3/utils/rpf_CoV.py +43 -0
- riboparser-0.1.3/utils/rpf_Corr.py +144 -0
- riboparser-0.1.3/utils/rpf_Coverage.py +42 -0
- riboparser-0.1.3/utils/rpf_Cumulative_CoV.py +39 -0
- riboparser-0.1.3/utils/rpf_Density.py +39 -0
- riboparser-0.1.3/utils/rpf_Digest.py +39 -0
- riboparser-0.1.3/utils/rpf_Geneplot.py +174 -0
- riboparser-0.1.3/utils/rpf_Merge.py +66 -0
- riboparser-0.1.3/utils/rpf_Meta_Codon.py +44 -0
- riboparser-0.1.3/utils/rpf_Metaplot.py +34 -0
- riboparser-0.1.3/utils/rpf_Occupancy.py +35 -0
- riboparser-0.1.3/utils/rpf_Odd_Ratio.py +41 -0
- riboparser-0.1.3/utils/rpf_Offset.py +54 -0
- riboparser-0.1.3/utils/rpf_Offset_RSBM.py +40 -0
- riboparser-0.1.3/utils/rpf_Pausing.py +47 -0
- riboparser-0.1.3/utils/rpf_Periodicity.py +35 -0
- riboparser-0.1.3/utils/rpf_Quant.py +44 -0
- riboparser-0.1.3/utils/rpf_Reference.py +39 -0
- riboparser-0.1.3/utils/rpf_Retrieve.py +36 -0
- riboparser-0.1.3/utils/rpf_Shuffle.py +31 -0
- riboparser-0.1.3/utils/rpf_end.py +210 -0
- riboparser-0.1.3/utils/serp_overlap.py +126 -0
- riboparser-0.1.3/utils/serp_peak.py +39 -0
- riboparser-0.1.3/utils/serp_properties.py +31 -0
|
@@ -0,0 +1,1316 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: RiboParser
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: A pipeline for ribosome profiling data analysis
|
|
5
|
+
Home-page: https://github.com/renscq/RiboParser
|
|
6
|
+
Author: Ren Shuchao
|
|
7
|
+
Author-email: rensc0718@163.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.12
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: numpy~=1.26.4
|
|
14
|
+
Requires-Dist: pandas~=2.2.2
|
|
15
|
+
Requires-Dist: pyarrow~=16.1.0
|
|
16
|
+
Requires-Dist: polars~=0.20.31
|
|
17
|
+
Requires-Dist: biopython~=1.78
|
|
18
|
+
Requires-Dist: scipy~=1.12.0
|
|
19
|
+
Requires-Dist: scikit-learn~=1.4.2
|
|
20
|
+
Requires-Dist: statsmodels~=0.14.2
|
|
21
|
+
Requires-Dist: pysam~=0.22.1
|
|
22
|
+
Requires-Dist: joblib~=1.4.2
|
|
23
|
+
Requires-Dist: interval~=1.0.0
|
|
24
|
+
Requires-Dist: matplotlib~=3.8.4
|
|
25
|
+
Requires-Dist: matplotlib-venn~=1.1.1
|
|
26
|
+
Requires-Dist: seaborn~=0.13.2
|
|
27
|
+
Requires-Dist: plotly~=5.22.0
|
|
28
|
+
Requires-Dist: seqlogo~=5.29.9
|
|
29
|
+
Requires-Dist: kaleido~=0.2.1
|
|
30
|
+
|
|
31
|
+
<!--
|
|
32
|
+
* @Author: 'rensc' 'rensc0718@163.com'
|
|
33
|
+
* @Date: 2024-10-15 11:44:58
|
|
34
|
+
* @LastEditors: 'rensc' 'rensc0718@163.com'
|
|
35
|
+
* @LastEditTime: 2024-10-20 07:06:36
|
|
36
|
+
* @FilePath: \RiboParser\README.md
|
|
37
|
+
*
|
|
38
|
+
-->
|
|
39
|
+
|
|
40
|
+
# RiboParser
|
|
41
|
+
|
|
42
|
+
为了便于理解和使用,这里对公开的项目数据进行分析,并拆解每一个分析步骤,来展示完整的工作流程。
|
|
43
|
+
这个过程包括了通用的分析步骤,以及定制的 `RiboParser` 和 `RiboShiny` 的分析和可视化步骤。
|
|
44
|
+
|
|
45
|
+
1. 软件的安装
|
|
46
|
+
2. 参考文件的创建
|
|
47
|
+
3. 原始数据的下载
|
|
48
|
+
4. 原始数据清洗
|
|
49
|
+
5. 数据比对
|
|
50
|
+
6. 测序质量分析
|
|
51
|
+
7. 基因水平分析
|
|
52
|
+
8. 密码子水平分析
|
|
53
|
+
|
|
54
|
+
以上的数据分析输出的结果可以在 `RiboShiny` 中进行下游的分析和可视化。
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
## 1. 软件的安装
|
|
58
|
+
|
|
59
|
+
### 1. conda 创建环境
|
|
60
|
+
```bash
|
|
61
|
+
conda create -n ribo
|
|
62
|
+
conda activate ribo
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### 2. conda 安装软件依赖
|
|
66
|
+
```bash
|
|
67
|
+
conda install cutadapt
|
|
68
|
+
conda install bowtie
|
|
69
|
+
conda install samtools
|
|
70
|
+
conda install star
|
|
71
|
+
conda install bedtools
|
|
72
|
+
conda install subread
|
|
73
|
+
conda install rsem
|
|
74
|
+
conda install pigz
|
|
75
|
+
conda install gffread
|
|
76
|
+
conda install sra-tools
|
|
77
|
+
conda install ucsc-genepredtogtf
|
|
78
|
+
conda install ucsc-gtftogenepred
|
|
79
|
+
conda install ucsc-gff3togenepred
|
|
80
|
+
conda install ucsc-bedgraphtobigwig
|
|
81
|
+
conda install ucsc-bedsort
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### 3. conda 安装 RiboParser
|
|
85
|
+
```bash
|
|
86
|
+
conda install riboparser -c rensc
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### 4. 测试安装状态:
|
|
90
|
+
测试软件的依赖、安装和运行问题。
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
rpf_test
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## 2. 准备参考文件
|
|
97
|
+
|
|
98
|
+
### 1. 完整项目目录示例如下:
|
|
99
|
+
|
|
100
|
+
完整的数据分析包含了参考文献的准备、RNA-seq的数据分析、Ribo-seq 的数据分析。
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
$ cd /mnt/t64/test/sce/
|
|
104
|
+
$ tree
|
|
105
|
+
|
|
106
|
+
.
|
|
107
|
+
├── 1.reference
|
|
108
|
+
│ ├── cdna
|
|
109
|
+
│ ├── genome
|
|
110
|
+
│ ├── gtf
|
|
111
|
+
│ ├── mrna
|
|
112
|
+
│ ├── norm
|
|
113
|
+
│ ├── ncrna
|
|
114
|
+
│ ├── rrna
|
|
115
|
+
│ ├── rsem-index
|
|
116
|
+
│ ├── star-index
|
|
117
|
+
│ └── trna
|
|
118
|
+
├── 2.rawdata
|
|
119
|
+
│ ├── rna-seq
|
|
120
|
+
│ └── ribo-seq
|
|
121
|
+
├── 3.rna-seq
|
|
122
|
+
│ ├── 1.cleandata
|
|
123
|
+
│ ├── 2.bowtie
|
|
124
|
+
│ ├── 3.star
|
|
125
|
+
│ ├── 4.quantification
|
|
126
|
+
│ └── 5.riboparser
|
|
127
|
+
│ ├── 01.qc
|
|
128
|
+
│ ├── 03.offset
|
|
129
|
+
│ ├── 04.density
|
|
130
|
+
│ ├── 05.merge
|
|
131
|
+
│ ├── 06.periodicity
|
|
132
|
+
│ ├── 07.metaplot
|
|
133
|
+
│ ├── 08.coverage
|
|
134
|
+
│ ├── 09.correlation
|
|
135
|
+
│ ├── 10.shuffle
|
|
136
|
+
│ └── 11.gene_density
|
|
137
|
+
├── 4.ribo-seq
|
|
138
|
+
│ ├── 1.cleandata
|
|
139
|
+
│ ├── 2.bowtie
|
|
140
|
+
│ ├── 3.star
|
|
141
|
+
│ ├── 4.quantification
|
|
142
|
+
│ └── 5.riboparser
|
|
143
|
+
│ ├── 01.qc
|
|
144
|
+
│ ├── 02.digestion
|
|
145
|
+
│ ├── 03.offset
|
|
146
|
+
│ ├── 04.density
|
|
147
|
+
│ ├── 05.merge
|
|
148
|
+
│ ├── 06.periodicity
|
|
149
|
+
│ ├── 07.metaplot
|
|
150
|
+
│ ├── 08.coverage
|
|
151
|
+
│ ├── 09.correlation
|
|
152
|
+
│ ├── 10.quantification
|
|
153
|
+
│ ├── 11.pausing_score
|
|
154
|
+
│ ├── 12.codon_occupancy
|
|
155
|
+
│ ├── 13.codon_decoding_time
|
|
156
|
+
│ ├── 14.codon_selection_time
|
|
157
|
+
│ ├── 15.coefficient_of_variation
|
|
158
|
+
│ ├── 16.meta_codon
|
|
159
|
+
│ ├── 17.shuffle
|
|
160
|
+
│ └── 18.gene_density
|
|
161
|
+
└── 5.test
|
|
162
|
+
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### 2. 准本参考基因组索引
|
|
166
|
+
|
|
167
|
+
#### 2.1. 创建目录
|
|
168
|
+
|
|
169
|
+
创建文件夹用于放置不同类型的参考序列文件。
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
$ cd /mnt/t64/test/sce/1.reference/
|
|
173
|
+
|
|
174
|
+
$ mkdir cdna genome gtf mrna ncrna rrna trna norm rsem-index
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
#### 2.2 从 NCBI 下载参考文件
|
|
178
|
+
|
|
179
|
+
使用最常用的数据分析文件格式,基因组序列为 fasta 格式,参考文件为 GTF 或者 GFF3 格式。
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
# genome sequence
|
|
183
|
+
$ wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/146/045/GCF_000146045.2_R64/GCF_000146045.2_R64_genomic.fna.gz
|
|
184
|
+
|
|
185
|
+
# GTF or GFF3
|
|
186
|
+
$ wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/146/045/GCF_000146045.2_R64/GCF_000146045.2_R64_genomic.gtf.gz
|
|
187
|
+
$ wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/146/045/GCF_000146045.2_R64/GCF_000146045.2_R64_genomic.gff.gz
|
|
188
|
+
|
|
189
|
+
# cDNA sequence
|
|
190
|
+
$ wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/146/045/GCF_000146045.2_R64/GCF_000146045.2_R64_rna.fna.gz
|
|
191
|
+
|
|
192
|
+
# feature table
|
|
193
|
+
$ wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/146/045/GCF_000146045.2_R64/GCF_000146045.2_R64_feature_table.txt.gz
|
|
194
|
+
|
|
195
|
+
# decompression
|
|
196
|
+
$ gunzip *.gz
|
|
197
|
+
|
|
198
|
+
$ gffread -g GCF_000146045.2_R64_genomic.fna GCF_000146045.2_R64_genomic.gff -F -w cdna.fa
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
#### 2.3 使用 bowtie 创建 genome 索引
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
$ cd /mnt/t64/test/sce/1.reference/genome
|
|
205
|
+
|
|
206
|
+
$ bowtie-build ../GCF_000146045.2_R64_genomic.fna genome.fa genome
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
#### 2.4 使用 bowtie 创建 mRNA 索引
|
|
210
|
+
|
|
211
|
+
```bash
|
|
212
|
+
$ cd mrna
|
|
213
|
+
|
|
214
|
+
# filter the mrna sequence
|
|
215
|
+
$ grep -i 'gbkey=mRNA' cdna.fa | cut -d ' ' -f 1 | cut -c 2- > mrna.ids
|
|
216
|
+
|
|
217
|
+
$ retrieve_seq -i cdna.fa -n mrna.ids -o mrna.fa
|
|
218
|
+
|
|
219
|
+
$ bowtie-build mrna.fa mrna
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
#### 2.5 使用 bowtie 创建 rRNA 索引
|
|
223
|
+
```bash
|
|
224
|
+
$ cd /mnt/t64/test/sce/1.reference/rrna
|
|
225
|
+
|
|
226
|
+
# filter the rrna sequence
|
|
227
|
+
$ grep -i 'gbkey=rRNA' cdna.fa | cut -d ' ' -f 1 | cut -c 2- > rrna.ids
|
|
228
|
+
|
|
229
|
+
$ retrieve_seq -i cdna.fa -n rrna.ids -o rrna.fa
|
|
230
|
+
|
|
231
|
+
$ bowtie-build rrna.fa rrna
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
#### 2.6 使用 bowtie 创建 tRNA 索引
|
|
235
|
+
```bash
|
|
236
|
+
$ cd /mnt/t64/test/sce/1.reference/trna
|
|
237
|
+
|
|
238
|
+
# filter the trna sequence
|
|
239
|
+
$ grep -i 'gbkey=tRNA' cdna.fa | cut -d ' ' -f 1 | cut -c 2- > trna.ids
|
|
240
|
+
|
|
241
|
+
$ retrieve_seq -i cdna.fa -n trna.ids -o trna.fa
|
|
242
|
+
|
|
243
|
+
$ bowtie-build trna.fa trna
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
#### 2.7 使用 bowtie 创建 ncRNA 索引
|
|
248
|
+
```bash
|
|
249
|
+
$ cd /mnt/t64/test/sce/1.reference/ncrna
|
|
250
|
+
|
|
251
|
+
# filter the ncrna sequence
|
|
252
|
+
$ grep -iE 'gbkey=ncRNA|gbkey=lnc_RNA|gbkey=miRNA|gbkey=snoRNA|gbkey=snRNA|gbkey=misc_RNA' cdna.fa | cut -d ' ' -f 1 | cut -c 2- > ncrna.ids
|
|
253
|
+
|
|
254
|
+
$ retrieve_seq -i cdna.fa -n ncrna.ids -o ncrna.fa
|
|
255
|
+
|
|
256
|
+
$ bowtie-build ncrna.fa ncrna
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
#### 2.8 标准化 gtf 文件
|
|
260
|
+
```bash
|
|
261
|
+
$ cd /mnt/t64/test/sce/1.reference/norm/
|
|
262
|
+
|
|
263
|
+
$ rpf_Reference \
|
|
264
|
+
-g ../GCF_000146045.2_R64_genomic.fna \
|
|
265
|
+
-t ../GCF_000146045.2_R64_genomic.gff \
|
|
266
|
+
-u 30 -o sce
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
#### 2.9 使用 star 创建 genome 索引
|
|
270
|
+
```bash
|
|
271
|
+
$ cd /mnt/t64/test/sce/1.reference/
|
|
272
|
+
|
|
273
|
+
$ STAR \
|
|
274
|
+
--genomeSAindexNbases 11 \
|
|
275
|
+
--runThreadN 12 \
|
|
276
|
+
--runMode genomeGenerate \
|
|
277
|
+
--genomeDir star-index \
|
|
278
|
+
--genomeFastaFiles GCF_000146045.2_R64_genomic.fna \
|
|
279
|
+
--sjdbGTFfile ./norm/sce.norm.gtf
|
|
280
|
+
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
#### 2.10 使用 rsem 创建 transcriptome 索引
|
|
284
|
+
```bash
|
|
285
|
+
$ cd /mnt/t64/test/sce/1.reference/rsem-index/
|
|
286
|
+
|
|
287
|
+
$ rsem-prepare-reference \
|
|
288
|
+
-p 10 \
|
|
289
|
+
--gtf ../norm/sce.norm.gtf ../GCF_000146045.2_R64_genomic.fna sce
|
|
290
|
+
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
## 3. 示例
|
|
295
|
+
为了展示 RiboParser 的分析流程和使用方法,这里使用数据集 GSE67387 的 RNA-seq 和 Ribo-seq 数据做示例。
|
|
296
|
+
|
|
297
|
+
```shell
|
|
298
|
+
# dataset
|
|
299
|
+
https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE67387
|
|
300
|
+
|
|
301
|
+
# reference
|
|
302
|
+
Nedialkova DD, Leidel SA. Optimization of Codon Translation Rates via tRNA Modifications Maintains Proteome Integrity. Cell 2015 Jun 18;161(7):1606-18.
|
|
303
|
+
PMID: 26052047
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
### 3.1 GSE67387 数据基础分析
|
|
308
|
+
|
|
309
|
+
#### 3.1.1 下载原始数据
|
|
310
|
+
1. 下载 RNA-seq 数据
|
|
311
|
+
使用 `sra-tools` 中的 `prefetch` 下载原始的 sra 格式数据,并解压为 fastq 格式文件。
|
|
312
|
+
```bash
|
|
313
|
+
$ cd /mnt/t64/test/sce/2.rawdata/rna-seq/
|
|
314
|
+
|
|
315
|
+
#################################################
|
|
316
|
+
# download rna-seq
|
|
317
|
+
$ prefetch -o SRR1944925.sra SRR1944925
|
|
318
|
+
$ prefetch -o SRR1944926.sra SRR1944926
|
|
319
|
+
$ prefetch -o SRR1944927.sra SRR1944927
|
|
320
|
+
$ prefetch -o SRR1944928.sra SRR1944928
|
|
321
|
+
$ prefetch -o SRR1944929.sra SRR1944929
|
|
322
|
+
$ prefetch -o SRR1944930.sra SRR1944930
|
|
323
|
+
$ prefetch -o SRR1944931.sra SRR1944931
|
|
324
|
+
$ prefetch -o SRR1944932.sra SRR1944932
|
|
325
|
+
$ prefetch -o SRR1944933.sra SRR1944933
|
|
326
|
+
$ prefetch -o SRR1944934.sra SRR1944934
|
|
327
|
+
$ prefetch -o SRR1944935.sra SRR1944935
|
|
328
|
+
|
|
329
|
+
# decompression
|
|
330
|
+
for sra in *.sra
|
|
331
|
+
do
|
|
332
|
+
fastq-dump $sra
|
|
333
|
+
pigz *fastq
|
|
334
|
+
done
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
2. 下载 Ribo-seq 数据
|
|
338
|
+
```bash
|
|
339
|
+
cd /mnt/t64/test/sce/2.rawdata/ribo-seq/
|
|
340
|
+
|
|
341
|
+
#################################################
|
|
342
|
+
# download ribo-seq
|
|
343
|
+
prefetch -o SRR1944912.sra SRR1944912
|
|
344
|
+
prefetch -o SRR1944913.sra SRR1944913
|
|
345
|
+
prefetch -o SRR1944914.sra SRR1944914
|
|
346
|
+
prefetch -o SRR1944915.sra SRR1944915
|
|
347
|
+
prefetch -o SRR1944916.sra SRR1944916
|
|
348
|
+
prefetch -o SRR1944917.sra SRR1944917
|
|
349
|
+
prefetch -o SRR1944918.sra SRR1944918
|
|
350
|
+
prefetch -o SRR1944919.sra SRR1944919
|
|
351
|
+
prefetch -o SRR1944920.sra SRR1944920
|
|
352
|
+
prefetch -o SRR1944921.sra SRR1944921
|
|
353
|
+
prefetch -o SRR1944922.sra SRR1944922
|
|
354
|
+
prefetch -o SRR1944923.sra SRR1944923
|
|
355
|
+
|
|
356
|
+
# decompression
|
|
357
|
+
for sra in *.sra
|
|
358
|
+
do
|
|
359
|
+
fastq-dump $sra
|
|
360
|
+
pigz *fastq
|
|
361
|
+
done
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
#### 3.1.2 数据清洗
|
|
366
|
+
因为该项目提供的原始数据是清洗后的,所以并不包含接头序列,这里只展示通用步骤。
|
|
367
|
+
|
|
368
|
+
1. 清洗 RNA-seq 数据
|
|
369
|
+
```bash
|
|
370
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/1.cleandata/
|
|
371
|
+
|
|
372
|
+
#################################################
|
|
373
|
+
# run the cutadapt
|
|
374
|
+
for fq in /mnt/t64/test/sce/2.rawdata/rna-seq/*fastq.gz
|
|
375
|
+
do
|
|
376
|
+
cutadapt --match-read-wildcards \
|
|
377
|
+
-a AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGC \
|
|
378
|
+
-m 10 -O 6 -j 10 \
|
|
379
|
+
-o `\basename $fq fastq.gz`clean.fastq.gz $fq &> $fq".log"
|
|
380
|
+
done
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
2. 清洗 Ribo-seq 数据
|
|
384
|
+
```bash
|
|
385
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/1.cleandata/
|
|
386
|
+
|
|
387
|
+
#################################################
|
|
388
|
+
# run the cutadapt
|
|
389
|
+
for fq in /mnt/t64/test/sce/2.rawdata/ribo-seq/*fastq.gz
|
|
390
|
+
do
|
|
391
|
+
cutadapt --match-read-wildcards \
|
|
392
|
+
-a AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGC \
|
|
393
|
+
-m 10 -O 6 -j 10 \
|
|
394
|
+
-o `\basename $fq fastq.gz`clean.fastq.gz $fq &> $fq".log"
|
|
395
|
+
done
|
|
396
|
+
```
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
#### 3.1.3 把 clean data 比对到不同类型的参考文件
|
|
400
|
+
为了确定文库的质量,排除不同 ncRNA 来源的 reads 对后续分析的影响,这里使用 `bowtie` 对数据进行分类。
|
|
401
|
+
正常情况下,尤其是使用 oligoDT 方法构建的 RNA-seq 文库,其中的 reads 大多来源于 mRNA。所以对于 RNA-seq 的分析而言,这个步骤不是必须的。
|
|
402
|
+
|
|
403
|
+
1. 比对 RNA-seq 数据
|
|
404
|
+
```bash
|
|
405
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/2.bowtie/
|
|
406
|
+
|
|
407
|
+
#################################################
|
|
408
|
+
# set database
|
|
409
|
+
rrna='/mnt/t64/test/sce/1.reference/rrna/rrna'
|
|
410
|
+
trna='/mnt/t64/test/sce/1.reference/trna/trna'
|
|
411
|
+
ncrna='/mnt/t64/test/sce/1.reference/ncrna/ncrna'
|
|
412
|
+
mrna='/mnt/t64/test/sce/1.reference/mrna/mrna'
|
|
413
|
+
chrom='/mnt/t64/test/sce/1.reference/genome/genome'
|
|
414
|
+
|
|
415
|
+
# alignment reads to reference
|
|
416
|
+
for fq in /mnt/t64/test/sce/3.rna-seq/1.cleandata/*fastq.gz
|
|
417
|
+
do
|
|
418
|
+
fqname=`\basename $fq .fastq.gz`
|
|
419
|
+
|
|
420
|
+
## rrna
|
|
421
|
+
bowtie -p 10 -v 1 --un="$fqname".norrna.fq --al="$fqname".rrna.fq \
|
|
422
|
+
-x $rrna $fq -S "$fqname".rrna.sam 2>> "$fqname".log
|
|
423
|
+
|
|
424
|
+
## trna
|
|
425
|
+
bowtie -p 10 -v 1 --un="$fqname".notrna.fq --al="$fqname".trna.fq \
|
|
426
|
+
-x $trna "$fqname".norrna.fq -S "$fqname".trna.sam 2>> "$fqname".log
|
|
427
|
+
|
|
428
|
+
## ncrna
|
|
429
|
+
bowtie -p 10 -v 1 --un="$fqname".noncrna.fq --al="$fqname".ncrna.fq \
|
|
430
|
+
-x $ncrna "$fqname".notrna.fq -S "$fqname".ncrna.sam 2>> "$fqname".log
|
|
431
|
+
|
|
432
|
+
## mrna
|
|
433
|
+
bowtie -p 10 -v 1 --un="$fqname".nomrna.fq --al="$fqname".mrna.fq \
|
|
434
|
+
-x $mrna "$fqname".noncrna.fq -S "$fqname".mrna.sam 2>> "$fqname".log
|
|
435
|
+
|
|
436
|
+
## genome
|
|
437
|
+
bowtie -p 10 -v 1 --un="$fqname".nogenome.fq --al="$fqname".genome.fq 、
|
|
438
|
+
-x $chrom "$fqname".nomrna.fq -S "$fqname".genome.sam 2>> "$fqname".log
|
|
439
|
+
|
|
440
|
+
## compress fastq
|
|
441
|
+
pigz *fq
|
|
442
|
+
|
|
443
|
+
## compress sam
|
|
444
|
+
for sam in *.sam
|
|
445
|
+
do
|
|
446
|
+
samtools view -h -F 4 $sam | samtools sort -@ $threads -o `\basename $sam sam`bam
|
|
447
|
+
rm $sam
|
|
448
|
+
done
|
|
449
|
+
|
|
450
|
+
done
|
|
451
|
+
```
|
|
452
|
+
|
|
453
|
+
2. 统计所有数据库的比对结果
|
|
454
|
+
```bash
|
|
455
|
+
#################################################
|
|
456
|
+
# merge all log files
|
|
457
|
+
merge_bwt_log \
|
|
458
|
+
-n rRNA,tRNA,ncRNA,mRNA,Genome \
|
|
459
|
+
-l *log -o sce
|
|
460
|
+
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
3. 比对 Ribo-seq 数据
|
|
464
|
+
```bash
|
|
465
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/2.bowtie/
|
|
466
|
+
|
|
467
|
+
#################################################
|
|
468
|
+
# set database
|
|
469
|
+
rrna='/mnt/t64/test/sce/1.reference/rrna/rrna'
|
|
470
|
+
trna='/mnt/t64/test/sce/1.reference/trna/trna'
|
|
471
|
+
ncrna='/mnt/t64/test/sce/1.reference/ncrna/ncrna'
|
|
472
|
+
mrna='/mnt/t64/test/sce/1.reference/mrna/mrna'
|
|
473
|
+
chrom='/mnt/t64/test/sce/1.reference/genome/genome'
|
|
474
|
+
|
|
475
|
+
# alignment reads to reference
|
|
476
|
+
for fq in /mnt/t64/test/sce/4.ribo-seq/1.cleandata/*fastq.gz
|
|
477
|
+
do
|
|
478
|
+
fqname=`\basename $fq .fastq.gz`
|
|
479
|
+
|
|
480
|
+
## rrna
|
|
481
|
+
bowtie -p 10 -v 1 --un="$fqname".norrna.fq --al="$fqname".rrna.fq \
|
|
482
|
+
-x $rrna $fq -S "$fqname".rrna.sam 2>> "$fqname".log
|
|
483
|
+
|
|
484
|
+
## trna
|
|
485
|
+
bowtie -p 10 -v 1 --un="$fqname".notrna.fq --al="$fqname".trna.fq \
|
|
486
|
+
-x $trna "$fqname".norrna.fq -S "$fqname".trna.sam 2>> "$fqname".log
|
|
487
|
+
|
|
488
|
+
## ncrna
|
|
489
|
+
bowtie -p 10 -v 1 --un="$fqname".noncrna.fq --al="$fqname".ncrna.fq \
|
|
490
|
+
-x $ncrna "$fqname".notrna.fq -S "$fqname".ncrna.sam 2>> "$fqname".log
|
|
491
|
+
|
|
492
|
+
## mrna
|
|
493
|
+
bowtie -p 10 -v 1 --un="$fqname".nomrna.fq --al="$fqname".mrna.fq \
|
|
494
|
+
-x $mrna "$fqname".noncrna.fq -S "$fqname".mrna.sam 2>> "$fqname".log
|
|
495
|
+
|
|
496
|
+
## genome
|
|
497
|
+
bowtie -p 10 -v 1 --un="$fqname".nogenome.fq --al="$fqname".genome.fq 、
|
|
498
|
+
-x $chrom "$fqname".nomrna.fq -S "$fqname".genome.sam 2>> "$fqname".log
|
|
499
|
+
|
|
500
|
+
## compress fastq
|
|
501
|
+
pigz *fq
|
|
502
|
+
|
|
503
|
+
## compress sam
|
|
504
|
+
for sam in *.sam
|
|
505
|
+
do
|
|
506
|
+
samtools view -h -F 4 $sam | samtools sort -@ $threads -o `\basename $sam sam`bam
|
|
507
|
+
rm $sam
|
|
508
|
+
done
|
|
509
|
+
|
|
510
|
+
done
|
|
511
|
+
|
|
512
|
+
```
|
|
513
|
+
|
|
514
|
+
4. 统计所有数据库的比对结果
|
|
515
|
+
```bash
|
|
516
|
+
#################################################
|
|
517
|
+
# merge all log files
|
|
518
|
+
merge_bwt_log \
|
|
519
|
+
-n rRNA,tRNA,ncRNA,mRNA,Genome \
|
|
520
|
+
-l *log -o sce
|
|
521
|
+
|
|
522
|
+
```
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
#### 3.1.4 使用 STAR 比对 mRNA 的 reads
|
|
526
|
+
去除掉 ncRNA 的 reads 之后,使用 star 重新比对到酵母的基因组。
|
|
527
|
+
|
|
528
|
+
1. 使用 star 比对 RNA-seq 的数据
|
|
529
|
+
```bash
|
|
530
|
+
cd /mnt/t64/test/sce/3.rna-seq/3.star/
|
|
531
|
+
|
|
532
|
+
#################################################
|
|
533
|
+
# set the option and database
|
|
534
|
+
genome='/mnt/t64/test/sce/1.reference/star-index/'
|
|
535
|
+
|
|
536
|
+
#################################################
|
|
537
|
+
# map the all rna-seq reads to genome and transcriptome region
|
|
538
|
+
for fastq in /mnt/t64/test/sce/3.rna-seq/2.bowtie/*.noncrna.fq.gz
|
|
539
|
+
do
|
|
540
|
+
|
|
541
|
+
## get file name
|
|
542
|
+
output=$(basename $fastq .noncrna.fq.gz)
|
|
543
|
+
|
|
544
|
+
#################################################
|
|
545
|
+
## run the alignment
|
|
546
|
+
STAR --runThreadN 10 \
|
|
547
|
+
--readFilesCommand zcat \
|
|
548
|
+
--genomeDir $genome \
|
|
549
|
+
--readFilesIn $fastq \
|
|
550
|
+
--outFileNamePrefix $output \
|
|
551
|
+
--outSAMtype BAM Unsorted \
|
|
552
|
+
--outFilterType BySJout \
|
|
553
|
+
--quantMode TranscriptomeSAM GeneCounts \
|
|
554
|
+
--outReadsUnmapped Fastx \
|
|
555
|
+
--outSAMattributes All \
|
|
556
|
+
--alignEndsType Local \
|
|
557
|
+
--outFilterMultimapNmax 3 \
|
|
558
|
+
--outFilterMismatchNmax 1 \
|
|
559
|
+
--alignIntronMax 10000 \
|
|
560
|
+
--outFilterMatchNmin 20
|
|
561
|
+
# --outWigType wiggle --outWigNorm RPM
|
|
562
|
+
|
|
563
|
+
pigz *mate1
|
|
564
|
+
|
|
565
|
+
#################################################
|
|
566
|
+
## sort the bam file
|
|
567
|
+
samtools sort -@ 10 $output"Aligned.out.bam" -o $output"Aligned.sortedByCoord.out.bam"
|
|
568
|
+
samtools index -@ 10 $output"Aligned.sortedByCoord.out.bam"
|
|
569
|
+
rm $output"Aligned.out.bam"
|
|
570
|
+
|
|
571
|
+
done
|
|
572
|
+
```
|
|
573
|
+
|
|
574
|
+
2. 使用 star 比对 Ribo-seq 的数据
|
|
575
|
+
```bash
|
|
576
|
+
cd /mnt/t64/test/sce/4.ribo-seq/3.star/
|
|
577
|
+
|
|
578
|
+
#################################################
|
|
579
|
+
# set the option and database
|
|
580
|
+
genome='/mnt/t64/test/sce/1.reference/star-index/'
|
|
581
|
+
|
|
582
|
+
#################################################
|
|
583
|
+
# map the all rna-seq reads to genome and transcriptome region
|
|
584
|
+
for fastq in /mnt/t64/test/sce/4.ribo-seq/2.bowtie/*.noncrna.fq.gz
|
|
585
|
+
do
|
|
586
|
+
|
|
587
|
+
## get file name
|
|
588
|
+
output=$(basename $fastq .noncrna.fq.gz)
|
|
589
|
+
|
|
590
|
+
#################################################
|
|
591
|
+
## run the alignment
|
|
592
|
+
STAR --runThreadN 10 \
|
|
593
|
+
--readFilesCommand zcat \
|
|
594
|
+
--genomeDir $genome \
|
|
595
|
+
--readFilesIn $fastq \
|
|
596
|
+
--outFileNamePrefix $output \
|
|
597
|
+
--outSAMtype BAM Unsorted \
|
|
598
|
+
--outFilterType BySJout \
|
|
599
|
+
--quantMode TranscriptomeSAM GeneCounts \
|
|
600
|
+
--outReadsUnmapped Fastx \
|
|
601
|
+
--outSAMattributes All \
|
|
602
|
+
--alignEndsType Local \
|
|
603
|
+
--outFilterMultimapNmax 3 \
|
|
604
|
+
--outFilterMismatchNmax 1 \
|
|
605
|
+
--alignIntronMax 10000 \
|
|
606
|
+
--outFilterMatchNmin 20
|
|
607
|
+
# --outWigType wiggle --outWigNorm RPM
|
|
608
|
+
|
|
609
|
+
pigz *mate1
|
|
610
|
+
|
|
611
|
+
#################################################
|
|
612
|
+
## sort the bam file
|
|
613
|
+
samtools sort -@ 10 $output"Aligned.out.bam" -o $output"Aligned.sortedByCoord.out.bam"
|
|
614
|
+
samtools index -@ 10 $output"Aligned.sortedByCoord.out.bam"
|
|
615
|
+
rm $output"Aligned.out.bam"
|
|
616
|
+
|
|
617
|
+
done
|
|
618
|
+
```
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
#### 3.1.5 使用 RSEM 或者 featureCounts 定量基因表达水平
|
|
622
|
+
我们可以已使用 RSEM 或者 featureCounts来对基因的表达水平进行定量,二者各有特色,这里使用 RSEM 做示例。
|
|
623
|
+
|
|
624
|
+
1. 定量 RNA-seq 的转录水平
|
|
625
|
+
```bash
|
|
626
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/4.quantification/
|
|
627
|
+
|
|
628
|
+
#################################################
|
|
629
|
+
# quantify the gene expression
|
|
630
|
+
for bam in /mnt/t64/test/sce/3.rna-seq/3.star/*Aligned.toTranscriptome.out.bam
|
|
631
|
+
do
|
|
632
|
+
rsem-calculate-expression -p 10 --no-bam-output --alignments -q $bam /mnt/t64/test/sce/1.reference/rsem-index/sce `\basename $bam Aligned.toTranscriptome.out.bam`
|
|
633
|
+
# rsem-calculate-expression -p 10 --paired-end --no-bam-output --alignments -q $bam /mnt/t64/test/sce/1.reference/rsem-index/sce `\basename $bam Aligned.toTranscriptome.out.bam`
|
|
634
|
+
done
|
|
635
|
+
```
|
|
636
|
+
|
|
637
|
+
2. 合并 RNA-seq 的数据定量结果
|
|
638
|
+
```bash
|
|
639
|
+
#################################################
|
|
640
|
+
# merge the gene expression
|
|
641
|
+
merge_rsem -c expected_count -l *.genes.results -o gene.expected_count.txt
|
|
642
|
+
merge_rsem -c TPM -l *.genes.results -o gene.TPM.txt
|
|
643
|
+
merge_rsem -c FPKM -l *.genes.results -o gene.FPKM.txt
|
|
644
|
+
|
|
645
|
+
#################################################
|
|
646
|
+
# merge the isoforms expression
|
|
647
|
+
merge_rsem -c expected_count -l *.isoforms.results -o isoforms.expected_count.txt
|
|
648
|
+
merge_rsem -c TPM -l *.isoforms.results -o isoforms.TPM.txt
|
|
649
|
+
merge_rsem -c FPKM -l *.isoforms.results -o isoforms.FPKM.txt
|
|
650
|
+
|
|
651
|
+
```
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
3. 定量 Ribo-seq 的转录水平
|
|
655
|
+
```bash
|
|
656
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/4.quantification/
|
|
657
|
+
|
|
658
|
+
#################################################
|
|
659
|
+
# quantify the isoforms expression
|
|
660
|
+
for bam in /mnt/t64/test/sce/4.ribo-seq/3.star/*Aligned.toTranscriptome.out.bam
|
|
661
|
+
do
|
|
662
|
+
rsem-calculate-expression -p 10 --no-bam-output --alignments -q $bam /mnt/t64/test/sce/1.reference/rsem-index/sce `\basename $bam Aligned.toTranscriptome.out.bam`
|
|
663
|
+
# rsem-calculate-expression -p 10 --paired-end --no-bam-output --alignments -q $bam /mnt/t64/test/sce/1.reference/rsem-index/sce `\basename $bam Aligned.toTranscriptome.out.bam`
|
|
664
|
+
done
|
|
665
|
+
```
|
|
666
|
+
|
|
667
|
+
4. 合并 Ribo-seq 的数据定量结果
|
|
668
|
+
```bash
|
|
669
|
+
#################################################
|
|
670
|
+
# merge the gene expression
|
|
671
|
+
merge_rsem -c expected_count -l *.genes.results -o gene.expected_count.txt
|
|
672
|
+
merge_rsem -c TPM -l *.genes.results -o gene.TPM.txt
|
|
673
|
+
merge_rsem -c FPKM -l *.genes.results -o gene.FPKM.txt
|
|
674
|
+
|
|
675
|
+
#################################################
|
|
676
|
+
# merge the isoforms expression
|
|
677
|
+
merge_rsem -c expected_count -l *.isoforms.results -o isoforms.expected_count.txt
|
|
678
|
+
merge_rsem -c TPM -l *.isoforms.results -o isoforms.TPM.txt
|
|
679
|
+
merge_rsem -c FPKM -l *.isoforms.results -o isoforms.FPKM.txt
|
|
680
|
+
|
|
681
|
+
```
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
### 3.2 使用 RiboParser 继续完成 GSE67387 的数据分析
|
|
685
|
+
#### 3.2.1 测序数据的质量检查
|
|
686
|
+
1. 检查 Ribo-seq 数据的测序质量
|
|
687
|
+
```bash
|
|
688
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/01.qc/
|
|
689
|
+
|
|
690
|
+
#################################################
|
|
691
|
+
# check the ribo-seq quality
|
|
692
|
+
for bam in /mnt/t64/test/sce/4.ribo-seq/3.star/*Aligned.toTranscriptome.out.bam
|
|
693
|
+
do
|
|
694
|
+
prefix_name=$(basename $bam Aligned.toTranscriptome.out.bam)
|
|
695
|
+
|
|
696
|
+
rpf_Check -b $bam -s --thread 10 -t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
697
|
+
-o $prefix_name &> $prefix_name".log"
|
|
698
|
+
|
|
699
|
+
done
|
|
700
|
+
```
|
|
701
|
+
|
|
702
|
+
2. 合并所有样本的质量分析结果
|
|
703
|
+
```bash
|
|
704
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/
|
|
705
|
+
|
|
706
|
+
#################################################
|
|
707
|
+
# merge the ribo-seq quality results
|
|
708
|
+
merge_length -l ./01.qc/*length_distribution.txt -o sce
|
|
709
|
+
merge_saturation -l ./01.qc/*gene_saturation.txt -o sce
|
|
710
|
+
|
|
711
|
+
```
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
3. 检查 RNA-seq 数据的测序质量
|
|
715
|
+
```bash
|
|
716
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/01.qc/
|
|
717
|
+
|
|
718
|
+
#################################################
|
|
719
|
+
# check the ribo-seq quality
|
|
720
|
+
for bam in /mnt/t64/test/sce/3.rna-seq/3.star/*Aligned.toTranscriptome.out.bam
|
|
721
|
+
do
|
|
722
|
+
prefix_name=$(basename $bam Aligned.toTranscriptome.out.bam)
|
|
723
|
+
|
|
724
|
+
rpf_Check -b $bam -s --thread 10 -t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
725
|
+
-o $prefix_name &> $prefix_name".log"
|
|
726
|
+
|
|
727
|
+
done
|
|
728
|
+
```
|
|
729
|
+
|
|
730
|
+
4. 合并所有样本的质量分析结果
|
|
731
|
+
```bash
|
|
732
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/
|
|
733
|
+
|
|
734
|
+
#################################################
|
|
735
|
+
# merge the rna-seq quality results
|
|
736
|
+
merge_length -l ./01.qc/*length_distribution.txt -o sce
|
|
737
|
+
merge_saturation -l ./01.qc/*gene_saturation.txt -o sce
|
|
738
|
+
|
|
739
|
+
```
|
|
740
|
+
|
|
741
|
+
|
|
742
|
+
#### 3.2.2 测序数据的酶切和酶连的偏好性
|
|
743
|
+
1. 检查 Ribo-seq 数据的酶切和酶连的偏好性
|
|
744
|
+
```bash
|
|
745
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/02.digestion/
|
|
746
|
+
|
|
747
|
+
#################################################
|
|
748
|
+
# check the reads digestion
|
|
749
|
+
for bam in /mnt/t64/test/sce/4.ribo-seq/3.star/01.qc/*.bam
|
|
750
|
+
do
|
|
751
|
+
prefix_name=$(basename $bam .bam)
|
|
752
|
+
|
|
753
|
+
rpf_Digest -b $bam -m 27 -M 33 --scale \
|
|
754
|
+
-s /mnt/t64/test/sce/1.reference/norm/sce.norm.rna.fa \
|
|
755
|
+
-t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
756
|
+
-o $prefix_name &> $prefix_name".log"
|
|
757
|
+
|
|
758
|
+
done
|
|
759
|
+
```
|
|
760
|
+
|
|
761
|
+
2. 合并所有样本的 reads digestion
|
|
762
|
+
```bash
|
|
763
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/
|
|
764
|
+
|
|
765
|
+
#################################################
|
|
766
|
+
# merge the rpf digestion
|
|
767
|
+
merge_digestion -l ./02.digestion/*pwm.txt -o sce
|
|
768
|
+
|
|
769
|
+
```
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
3. 检查 RNA-seq 数据的酶切和酶连的偏好性
|
|
773
|
+
```bash
|
|
774
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/02.digestion/
|
|
775
|
+
|
|
776
|
+
#################################################
|
|
777
|
+
# check the reads digestion
|
|
778
|
+
for bam in /mnt/t64/test/sce/3.rna-seq/3.star/01.qc/*.bam
|
|
779
|
+
do
|
|
780
|
+
prefix_name=$(basename $bam .bam)
|
|
781
|
+
|
|
782
|
+
rpf_Digest -b $bam -m 25 -M 50 --scale \
|
|
783
|
+
-s /mnt/t64/test/sce/1.reference/norm/sce.norm.rna.fa \
|
|
784
|
+
-t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
785
|
+
-o $prefix_name &> $prefix_name".log"
|
|
786
|
+
|
|
787
|
+
done
|
|
788
|
+
```
|
|
789
|
+
|
|
790
|
+
4. 合并所有样本的 reads digestion
|
|
791
|
+
```bash
|
|
792
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/
|
|
793
|
+
|
|
794
|
+
#################################################
|
|
795
|
+
# merge the rpf digestion
|
|
796
|
+
merge_digestion -l ./02.digestion/*pwm.txt -o sce
|
|
797
|
+
|
|
798
|
+
```
|
|
799
|
+
|
|
800
|
+
|
|
801
|
+
#### 3.2.3 使用 RiboParser 做质量检查
|
|
802
|
+
1. 预测 Ribo-seq 中的最佳 offset
|
|
803
|
+
```bash
|
|
804
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/03.offset/
|
|
805
|
+
|
|
806
|
+
#################################################
|
|
807
|
+
# predict the offset table
|
|
808
|
+
for bam in /mnt/t64/test/sce/3.rna-seq/3.star/01.qc/*.bam
|
|
809
|
+
do
|
|
810
|
+
prefix_name=$(basename $bam .bam)
|
|
811
|
+
|
|
812
|
+
rpf_Offset -b $bam -m 27 -M 33 -p 30 -d \
|
|
813
|
+
--mode RSBM \
|
|
814
|
+
-t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
815
|
+
-o $prefix_name &> $prefix_name".log"
|
|
816
|
+
|
|
817
|
+
done
|
|
818
|
+
```
|
|
819
|
+
|
|
820
|
+
2. 合并所有样本的 offset 预测结果
|
|
821
|
+
```bash
|
|
822
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/
|
|
823
|
+
|
|
824
|
+
#################################################
|
|
825
|
+
# merge the ribo-seq offset results
|
|
826
|
+
merge_offset_detail -l ./03.offset/*end.txt -o sce
|
|
827
|
+
merge_offset -l ./03.offset/*sscbm_offset.txt -o sce_sscbm
|
|
828
|
+
merge_offset -l ./03.offset/*rsbm_offset.txt -o sce_rsbm
|
|
829
|
+
|
|
830
|
+
```
|
|
831
|
+
|
|
832
|
+
3. RNA-seq 无需预测 offset,这里直接创建一个文件,其中 offset 值均为 12。
|
|
833
|
+
```bash
|
|
834
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/03.offset/
|
|
835
|
+
|
|
836
|
+
#################################################
|
|
837
|
+
# set the offset table
|
|
838
|
+
for bam in /mnt/t64/test/sce/3.rna-seq/3.star/01.qc/*.bam
|
|
839
|
+
do
|
|
840
|
+
|
|
841
|
+
prefix_name=$(basename $bam .bam)
|
|
842
|
+
rna_Offset -m 27 -M 50 -e 12 -o $prefix_name &> $prefix_name".log"
|
|
843
|
+
|
|
844
|
+
done
|
|
845
|
+
```
|
|
846
|
+
|
|
847
|
+
|
|
848
|
+
#### 3.2.4 把 bam 文件中的 reads 转换为 txt 文件中的 density。
|
|
849
|
+
1. 转换 Ribo-seq 数据
|
|
850
|
+
```bash
|
|
851
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/04.density/
|
|
852
|
+
|
|
853
|
+
#################################################
|
|
854
|
+
# convert the rpf to density
|
|
855
|
+
for bam in /mnt/t64/test/sce/4.ribo-seq/3.star/01.qc/*.bam
|
|
856
|
+
do
|
|
857
|
+
prefix_name=$(basename $bam .bam)
|
|
858
|
+
|
|
859
|
+
rpf_Density -b $bam -m 27 -M 33 --period 40 -l --thread 10 \
|
|
860
|
+
-p /mnt/t64/test/sce/4.ribo-seq/3.star/03.offset/$prefix_name"_rsbm_offset.txt" \
|
|
861
|
+
-s /mnt/t64/test/sce/1.reference/norm/sce.norm.rna.fa \
|
|
862
|
+
-t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
863
|
+
-o $prefix_name &> $prefix_name".log"
|
|
864
|
+
|
|
865
|
+
done
|
|
866
|
+
|
|
867
|
+
```
|
|
868
|
+
|
|
869
|
+
2. 转换 RNA-seq 数据
|
|
870
|
+
```bash
|
|
871
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/04.density/
|
|
872
|
+
|
|
873
|
+
#################################################
|
|
874
|
+
# convert the reads to density
|
|
875
|
+
for bam in /mnt/t64/test/sce/3.rna-seq/3.star/01.qc/*.bam
|
|
876
|
+
do
|
|
877
|
+
prefix_name=$(basename $bam .bam)
|
|
878
|
+
|
|
879
|
+
rna_Density -b $bam -m 27 -M 33 --period 40 -l --thread 10 \
|
|
880
|
+
-p /mnt/t64/test/sce/3.rna-seq/3.star/03.offset/$prefix_name"_offset.txt" \
|
|
881
|
+
-s /mnt/t64/test/sce/1.reference/norm/sce.norm.rna.fa \
|
|
882
|
+
-t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
883
|
+
-o $prefix_name &> $prefix_name".log"
|
|
884
|
+
|
|
885
|
+
done
|
|
886
|
+
|
|
887
|
+
```
|
|
888
|
+
|
|
889
|
+
|
|
890
|
+
#### 3.2.5 合并所有文件
|
|
891
|
+
1. 合并 Ribo-seq density 文件
|
|
892
|
+
```bash
|
|
893
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/
|
|
894
|
+
|
|
895
|
+
#################################################
|
|
896
|
+
# create the samples file: Ribo.file.list
|
|
897
|
+
merge_dst_list -l ../04.density/*_rpf.txt -o RPF.file.list
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
cat RPF.file.list
|
|
901
|
+
|
|
902
|
+
Name File Type
|
|
903
|
+
wt_ribo_YPD1 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944912_rpf.txt Ribo
|
|
904
|
+
wt_ribo_YPD2 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944913_rpf.txt Ribo
|
|
905
|
+
wt_ribo_YPD3 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944914_rpf.txt Ribo
|
|
906
|
+
ncs2d_ribo_YPD1 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944915_rpf.txt Ribo
|
|
907
|
+
ncs2d_ribo_YPD2 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944916_rpf.txt Ribo
|
|
908
|
+
ncs2d_ribo_YPD3 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944917_rpf.txt Ribo
|
|
909
|
+
elp6d_ribo_YPD1 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944918_rpf.txt Ribo
|
|
910
|
+
elp6d_ribo_YPD2 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944919_rpf.txt Ribo
|
|
911
|
+
elp6d_ribo_YPD3 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944920_rpf.txt Ribo
|
|
912
|
+
ncs2d_elp6d_ribo_YPD1 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944921_rpf.txt Ribo
|
|
913
|
+
ncs2d_elp6d_ribo_YPD2 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944922_rpf.txt Ribo
|
|
914
|
+
ncs2d_elp6d_ribo_YPD3 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944923_rpf.txt Ribo
|
|
915
|
+
|
|
916
|
+
#################################################
|
|
917
|
+
# merge all the Ribo-seq files
|
|
918
|
+
rpf_Merge -l RPF.file.list -o sce_rpf &> sce.log
|
|
919
|
+
|
|
920
|
+
```
|
|
921
|
+
|
|
922
|
+
2. 合并 RNA-seq density 文件
|
|
923
|
+
```bash
|
|
924
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/
|
|
925
|
+
|
|
926
|
+
#################################################
|
|
927
|
+
# create the samples file: RNA.file.list
|
|
928
|
+
merge_dst_list -l ../04.density/*_rna.txt -o RNA.file.list
|
|
929
|
+
|
|
930
|
+
cat RNA.file.list
|
|
931
|
+
|
|
932
|
+
Name File Type
|
|
933
|
+
wt_rna_YPD1 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944912_rna.txt RNA
|
|
934
|
+
wt_rna_YPD2 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944913_rna.txt RNA
|
|
935
|
+
wt_rna_YPD3 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944914_rna.txt RNA
|
|
936
|
+
ncs2d_rna_YPD1 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944915_rna.txt RNA
|
|
937
|
+
ncs2d_rna_YPD2 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944916_rna.txt RNA
|
|
938
|
+
ncs2d_rna_YPD3 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944917_rna.txt RNA
|
|
939
|
+
elp6d_rna_YPD1 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944918_rna.txt RNA
|
|
940
|
+
elp6d_rna_YPD2 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944919_rna.txt RNA
|
|
941
|
+
elp6d_rna_YPD3 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944920_rna.txt RNA
|
|
942
|
+
ncs2d_elp6d_rna_YPD1 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944921_rna.txt RNA
|
|
943
|
+
ncs2d_elp6d_rna_YPD2 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944922_rna.txt RNA
|
|
944
|
+
ncs2d_elp6d_rna_YPD3 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944923_rna.txt RNA
|
|
945
|
+
|
|
946
|
+
#################################################
|
|
947
|
+
# merge all the RNA-seq files
|
|
948
|
+
rpf_Merge -l RNA.file.list -o sce_rna &> sce.log
|
|
949
|
+
|
|
950
|
+
```
|
|
951
|
+
|
|
952
|
+
|
|
953
|
+
#### 3.2.6 计算三核苷酸周期性
|
|
954
|
+
1. 检查 Ribo-seq 数据三核苷酸周期性
|
|
955
|
+
```bash
|
|
956
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/06.periodicity/
|
|
957
|
+
|
|
958
|
+
#################################################
|
|
959
|
+
# check the periodicity
|
|
960
|
+
rpf_Periodicity \
|
|
961
|
+
-r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
|
|
962
|
+
-m 30 --tis 0 --tts 0 -o sce &> sce.log
|
|
963
|
+
|
|
964
|
+
```
|
|
965
|
+
|
|
966
|
+
2. 检查 RNA-seq 数据三核苷酸周期性
|
|
967
|
+
```bash
|
|
968
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/06.periodicity/
|
|
969
|
+
|
|
970
|
+
#################################################
|
|
971
|
+
# check the periodicity
|
|
972
|
+
rpf_Periodicity \
|
|
973
|
+
-r /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
|
|
974
|
+
-m 30 --tis 0 --tts 0 -o sce &> sce.log
|
|
975
|
+
|
|
976
|
+
```
|
|
977
|
+
|
|
978
|
+
|
|
979
|
+
#### 3.2.7 起始和终止密码子前后的 meta-gene 分析
|
|
980
|
+
1. Ribo-seq 数据 meta-gene 分析
|
|
981
|
+
```bash
|
|
982
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/07.metaplot/
|
|
983
|
+
|
|
984
|
+
#################################################
|
|
985
|
+
# metagene analysis
|
|
986
|
+
rpf_Metaplot \
|
|
987
|
+
-t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
988
|
+
-r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
|
|
989
|
+
-m 50 --mode bar -o sce &> sce.log
|
|
990
|
+
|
|
991
|
+
```
|
|
992
|
+
|
|
993
|
+
2. RNA-seq 数据 meta-gene 分析
|
|
994
|
+
```bash
|
|
995
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/07.metaplot/
|
|
996
|
+
|
|
997
|
+
#################################################
|
|
998
|
+
# metagene analysis
|
|
999
|
+
rpf_Metaplot \
|
|
1000
|
+
-t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
1001
|
+
-r /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
|
|
1002
|
+
-m 50 --mode bar -o sce &> sce.log
|
|
1003
|
+
|
|
1004
|
+
```
|
|
1005
|
+
|
|
1006
|
+
|
|
1007
|
+
#### 3.2.8 检查基因上的整体 density 覆盖情况
|
|
1008
|
+
1. 检查 Ribo-seq 数据的 density 覆盖
|
|
1009
|
+
```bash
|
|
1010
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/08.coverage/
|
|
1011
|
+
|
|
1012
|
+
#################################################
|
|
1013
|
+
# check the rpf density along with the gene body
|
|
1014
|
+
rpf_Coverage \
|
|
1015
|
+
-t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
1016
|
+
-r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
|
|
1017
|
+
-m 50 --outlier \
|
|
1018
|
+
-b 10,100,10 \
|
|
1019
|
+
-n --heat \
|
|
1020
|
+
-o sce &> sce.log
|
|
1021
|
+
|
|
1022
|
+
```
|
|
1023
|
+
|
|
1024
|
+
2. 检查 RNA-seq 数据的 density 覆盖
|
|
1025
|
+
```bash
|
|
1026
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/08.coverage/
|
|
1027
|
+
|
|
1028
|
+
#################################################
|
|
1029
|
+
# check the reads density along with the gene body
|
|
1030
|
+
rpf_Coverage \
|
|
1031
|
+
-t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
1032
|
+
-r /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
|
|
1033
|
+
-m 50 --outlier \
|
|
1034
|
+
-b 10,100,10 \
|
|
1035
|
+
-n --heat \
|
|
1036
|
+
-o sce &> sce.log
|
|
1037
|
+
|
|
1038
|
+
```
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
#### 3.2.9 检查样本之间的重复性
|
|
1042
|
+
1. 检查 Ribo-seq 数据样本重复性
|
|
1043
|
+
```bash
|
|
1044
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/09.correlation/
|
|
1045
|
+
|
|
1046
|
+
#################################################
|
|
1047
|
+
# calculate the samples replication of Ribo-seq
|
|
1048
|
+
rpf_Corr \
|
|
1049
|
+
-r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
|
|
1050
|
+
-o sce &> sce.log
|
|
1051
|
+
|
|
1052
|
+
```
|
|
1053
|
+
|
|
1054
|
+
2. 检查 RNA-seq 数据的重复性
|
|
1055
|
+
```bash
|
|
1056
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/09.correlation/
|
|
1057
|
+
|
|
1058
|
+
#################################################
|
|
1059
|
+
# calculate the samples replication of RNA-seq
|
|
1060
|
+
rpf_Corr \
|
|
1061
|
+
-r /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
|
|
1062
|
+
-o sce &> sce.log
|
|
1063
|
+
|
|
1064
|
+
```
|
|
1065
|
+
|
|
1066
|
+
|
|
1067
|
+
#### 3.2.10 基因表达和翻译水平定量
|
|
1068
|
+
1. 计算基因的翻译量(RPFs level)
|
|
1069
|
+
```bash
|
|
1070
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/10.quantification/
|
|
1071
|
+
|
|
1072
|
+
#################################################
|
|
1073
|
+
# quantify the gene expression
|
|
1074
|
+
rpf_Quant \
|
|
1075
|
+
-r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
|
|
1076
|
+
--tis 15 \
|
|
1077
|
+
--tts 5 \
|
|
1078
|
+
-o sce &> sce.log
|
|
1079
|
+
|
|
1080
|
+
```
|
|
1081
|
+
|
|
1082
|
+
|
|
1083
|
+
#### 3.2.11 计算密码子水平的 pausing score
|
|
1084
|
+
1. 计算 Ribo-seq 数据中密码子水平的 pausing score
|
|
1085
|
+
```bash
|
|
1086
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/11.pausing_score/
|
|
1087
|
+
|
|
1088
|
+
#################################################
|
|
1089
|
+
# calculate the codon pausing score of E/P/A site
|
|
1090
|
+
for sites in E P A
|
|
1091
|
+
do
|
|
1092
|
+
rpf_Pausing \
|
|
1093
|
+
-l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
1094
|
+
-r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
|
|
1095
|
+
-b 0 --stop \
|
|
1096
|
+
-m 30 \
|
|
1097
|
+
-s $sites \
|
|
1098
|
+
-f 0 \
|
|
1099
|
+
--scale minmax \
|
|
1100
|
+
-o "$sites"_site &> "$sites"_site.log
|
|
1101
|
+
done
|
|
1102
|
+
|
|
1103
|
+
```
|
|
1104
|
+
|
|
1105
|
+
|
|
1106
|
+
#### 3.2.12 计算密码子水平的 occupancy
|
|
1107
|
+
1. 计算 Ribo-seq 数据中密码子水平的 occupancy
|
|
1108
|
+
```bash
|
|
1109
|
+
$ cd /mnt/t64/test/sce/4.rpf-seq/5.riboparser/12.codon_occupancy/
|
|
1110
|
+
|
|
1111
|
+
#################################################
|
|
1112
|
+
# calculate the codon occupancy of E/P/A site
|
|
1113
|
+
for sites in E P A
|
|
1114
|
+
do
|
|
1115
|
+
rpf_Occupancy \
|
|
1116
|
+
-l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
1117
|
+
-r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
|
|
1118
|
+
-m 30 \
|
|
1119
|
+
-s "$sites" \
|
|
1120
|
+
-f 0 --stop \
|
|
1121
|
+
--scale minmax \
|
|
1122
|
+
-o "$sites"_site &> "$sites"_site.log
|
|
1123
|
+
done
|
|
1124
|
+
|
|
1125
|
+
```
|
|
1126
|
+
|
|
1127
|
+
|
|
1128
|
+
#### 3.2.13 计算密码子水平的 decoding time
|
|
1129
|
+
1. 计算 Ribo-seq 数据中密码子水平的 decoding time
|
|
1130
|
+
```bash
|
|
1131
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/13.codon_decoding_time/
|
|
1132
|
+
|
|
1133
|
+
#################################################
|
|
1134
|
+
# calculate the codon decoding time of E/P/A site
|
|
1135
|
+
for sites in E P A
|
|
1136
|
+
do
|
|
1137
|
+
rpf_CDT \
|
|
1138
|
+
-l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
1139
|
+
--rna /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
|
|
1140
|
+
--rpf /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
|
|
1141
|
+
--stop \
|
|
1142
|
+
-m 50 \
|
|
1143
|
+
-f 0 \
|
|
1144
|
+
-s $sites \
|
|
1145
|
+
--tis 10 \
|
|
1146
|
+
--tts 5 \
|
|
1147
|
+
-o "$sites"_site &> "$sites"_site.log
|
|
1148
|
+
done
|
|
1149
|
+
|
|
1150
|
+
```
|
|
1151
|
+
|
|
1152
|
+
|
|
1153
|
+
#### 3.2.14 计算密码子水平的 selection time
|
|
1154
|
+
1. 计算 Ribo-seq 数据中密码子水平的 selection time
|
|
1155
|
+
```bash
|
|
1156
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/14.codon_selection_time/
|
|
1157
|
+
|
|
1158
|
+
#################################################
|
|
1159
|
+
# calculate the codon selection time of E/P/A site
|
|
1160
|
+
for sites in E P A
|
|
1161
|
+
do
|
|
1162
|
+
rpf_CST \
|
|
1163
|
+
-l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
1164
|
+
--rna /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
|
|
1165
|
+
--rpf /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
|
|
1166
|
+
--stop \
|
|
1167
|
+
-m 50 \
|
|
1168
|
+
-f 0 \
|
|
1169
|
+
-s $sites \
|
|
1170
|
+
--tis 10 \
|
|
1171
|
+
--tts 5 \
|
|
1172
|
+
-o "$sites"_site &> "$sites"_site.log
|
|
1173
|
+
done
|
|
1174
|
+
|
|
1175
|
+
```
|
|
1176
|
+
|
|
1177
|
+
|
|
1178
|
+
#### 3.2.15 计算基因和密码子水平的变异系数
|
|
1179
|
+
1. 计算 Ribo-seq 数据中基因和密码子水平的变异系数
|
|
1180
|
+
```bash
|
|
1181
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/15.coefficient_of_variation/
|
|
1182
|
+
|
|
1183
|
+
#################################################
|
|
1184
|
+
# Here we can configure the design file to calculate differences between different groups.
|
|
1185
|
+
$ cat design.txt
|
|
1186
|
+
name group
|
|
1187
|
+
WT_ribo_YPD1 WT_ribo_YPD
|
|
1188
|
+
WT_ribo_YPD2 WT_ribo_YPD
|
|
1189
|
+
WT_ribo_YPD3 WT_ribo_YPD
|
|
1190
|
+
ncs2d_ribo_YPD1 ncs2d_ribo_YPD
|
|
1191
|
+
ncs2d_ribo_YPD2 ncs2d_ribo_YPD
|
|
1192
|
+
ncs2d_ribo_YPD3 ncs2d_ribo_YPD
|
|
1193
|
+
elp6d_ribo_YPD1 elp6d_ribo_YPD
|
|
1194
|
+
elp6d_ribo_YPD2 elp6d_ribo_YPD
|
|
1195
|
+
elp6d_ribo_YPD3 elp6d_ribo_YPD
|
|
1196
|
+
ncs2d_elp6d_ribo_YPD1 ncs2d_elp6d_ribo_YPD
|
|
1197
|
+
ncs2d_elp6d_ribo_YPD2 ncs2d_elp6d_ribo_YPD
|
|
1198
|
+
ncs2d_elp6d_ribo_YPD3 ncs2d_elp6d_ribo_YPD
|
|
1199
|
+
|
|
1200
|
+
#################################################
|
|
1201
|
+
# calculate the coefficient of variation
|
|
1202
|
+
rpf_CoV \
|
|
1203
|
+
-l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
1204
|
+
-r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
|
|
1205
|
+
-f 0 \
|
|
1206
|
+
-m 30 \
|
|
1207
|
+
--tis 10 \
|
|
1208
|
+
--tts 5 \
|
|
1209
|
+
--fig \
|
|
1210
|
+
-g design.txt \
|
|
1211
|
+
-o sce &> sce.log
|
|
1212
|
+
|
|
1213
|
+
```
|
|
1214
|
+
|
|
1215
|
+
#### 3.2.16 密码子 meta-codon 分析
|
|
1216
|
+
1. 计算 Ribo-seq 数据中密码子 meta density
|
|
1217
|
+
```bash
|
|
1218
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/16.meta_codon/
|
|
1219
|
+
|
|
1220
|
+
#################################################
|
|
1221
|
+
# Here we can configure the codon list.
|
|
1222
|
+
$ cat codon_list.txt
|
|
1223
|
+
AAA
|
|
1224
|
+
AAC
|
|
1225
|
+
AAG
|
|
1226
|
+
AAT
|
|
1227
|
+
AAGAAG
|
|
1228
|
+
ATGATG
|
|
1229
|
+
CCCGGG
|
|
1230
|
+
...
|
|
1231
|
+
|
|
1232
|
+
|
|
1233
|
+
#################################################
|
|
1234
|
+
# codon meta analysis
|
|
1235
|
+
rpf_Meta_Codon \
|
|
1236
|
+
-r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
|
|
1237
|
+
-m 50 -f 0 \
|
|
1238
|
+
-c codon_list.txt \
|
|
1239
|
+
-a 15 -u -n --fig \
|
|
1240
|
+
-o sce &> sce.log
|
|
1241
|
+
|
|
1242
|
+
```
|
|
1243
|
+
|
|
1244
|
+
#### 3.2.17 Data shuffling
|
|
1245
|
+
1. 重新洗牌 Ribo-seq 数据的 gene density 文件
|
|
1246
|
+
```bash
|
|
1247
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/17.shuffle/
|
|
1248
|
+
|
|
1249
|
+
#################################################
|
|
1250
|
+
# codon meta analysis
|
|
1251
|
+
rpf_Shuffle \
|
|
1252
|
+
-l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
1253
|
+
-r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
|
|
1254
|
+
-s 0 \
|
|
1255
|
+
-i \
|
|
1256
|
+
-o sce &> sce.log
|
|
1257
|
+
|
|
1258
|
+
```
|
|
1259
|
+
|
|
1260
|
+
2. 重新洗牌 RNA-seq 数据的 gene density 文件
|
|
1261
|
+
```bash
|
|
1262
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/11.shuffle/
|
|
1263
|
+
|
|
1264
|
+
#################################################
|
|
1265
|
+
# retrieve and format the gene density
|
|
1266
|
+
rpf_Shuffle \
|
|
1267
|
+
-l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
1268
|
+
-r /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
|
|
1269
|
+
-s 0 \
|
|
1270
|
+
-i \
|
|
1271
|
+
-o sce &> sce.log
|
|
1272
|
+
|
|
1273
|
+
```
|
|
1274
|
+
|
|
1275
|
+
#### 3.2.18 提取 gene density
|
|
1276
|
+
1. 提取和格式化 Ribo-seq 数据中的 gene density
|
|
1277
|
+
```bash
|
|
1278
|
+
$ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/18.gene_density/
|
|
1279
|
+
|
|
1280
|
+
#################################################
|
|
1281
|
+
# codon meta analysis
|
|
1282
|
+
rpf_Retrieve \
|
|
1283
|
+
-l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
1284
|
+
-r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
|
|
1285
|
+
-m 0 \
|
|
1286
|
+
-f \
|
|
1287
|
+
-n \
|
|
1288
|
+
-o sce &> sce.log
|
|
1289
|
+
|
|
1290
|
+
```
|
|
1291
|
+
|
|
1292
|
+
2. 提取和格式化 RNA-seq 数据中的 gene density
|
|
1293
|
+
```bash
|
|
1294
|
+
$ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/12.gene_density/
|
|
1295
|
+
|
|
1296
|
+
#################################################
|
|
1297
|
+
# retrieve and format the gene density
|
|
1298
|
+
rpf_Retrieve \
|
|
1299
|
+
-l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
|
|
1300
|
+
-r /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
|
|
1301
|
+
-m 0 \
|
|
1302
|
+
-f \
|
|
1303
|
+
-n \
|
|
1304
|
+
-o sce &> sce.log
|
|
1305
|
+
|
|
1306
|
+
```
|
|
1307
|
+
|
|
1308
|
+
|
|
1309
|
+
## 4. 贡献
|
|
1310
|
+
|
|
1311
|
+
欢迎提交问题和贡献代码
|
|
1312
|
+
联系 rensc0718@163.com
|
|
1313
|
+
|
|
1314
|
+
## 5. 许可证
|
|
1315
|
+
|
|
1316
|
+
本项目可免费用于学术研究,不得用于商业用途。
|