RiboParser 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. riboparser-0.1.3/PKG-INFO +1316 -0
  2. riboparser-0.1.3/README.md +1280 -0
  3. riboparser-0.1.3/RiboParser.egg-info/PKG-INFO +1316 -0
  4. riboparser-0.1.3/RiboParser.egg-info/SOURCES.txt +130 -0
  5. riboparser-0.1.3/RiboParser.egg-info/dependency_links.txt +1 -0
  6. riboparser-0.1.3/RiboParser.egg-info/entry_points.txt +73 -0
  7. riboparser-0.1.3/RiboParser.egg-info/requires.txt +17 -0
  8. riboparser-0.1.3/RiboParser.egg-info/top_level.txt +2 -0
  9. riboparser-0.1.3/scripts/__init__.py +8 -0
  10. riboparser-0.1.3/scripts/bam/__init__.py +0 -0
  11. riboparser-0.1.3/scripts/bam/flt_bam_threads.py +73 -0
  12. riboparser-0.1.3/scripts/bam/test_flt.py +54 -0
  13. riboparser-0.1.3/scripts/bedgraph/__init__.py +0 -0
  14. riboparser-0.1.3/scripts/bedgraph/bg2meta.py +90 -0
  15. riboparser-0.1.3/scripts/bedgraph/rpm_smooth.py +270 -0
  16. riboparser-0.1.3/scripts/bedgraph/site2base.py +47 -0
  17. riboparser-0.1.3/scripts/bowtie/__init__.py +0 -0
  18. riboparser-0.1.3/scripts/bowtie/merge_bwt_log.py +138 -0
  19. riboparser-0.1.3/scripts/fasta/__init__.py +0 -0
  20. riboparser-0.1.3/scripts/fasta/fa_gc_sum.py +99 -0
  21. riboparser-0.1.3/scripts/fasta/fa_len_flt.py +90 -0
  22. riboparser-0.1.3/scripts/fasta/fa_len_sum.py +114 -0
  23. riboparser-0.1.3/scripts/fasta/fa_split.py +110 -0
  24. riboparser-0.1.3/scripts/fasta/line_feed.py +75 -0
  25. riboparser-0.1.3/scripts/fasta/nt2aa.py +152 -0
  26. riboparser-0.1.3/scripts/fasta/rand_seq.py +105 -0
  27. riboparser-0.1.3/scripts/fasta/retrieve_seq.py +90 -0
  28. riboparser-0.1.3/scripts/fasta/revs.py +76 -0
  29. riboparser-0.1.3/scripts/fastq/__init__.py +0 -0
  30. riboparser-0.1.3/scripts/fastq/fq2fa.py +86 -0
  31. riboparser-0.1.3/scripts/fastq/fq2txt.py +72 -0
  32. riboparser-0.1.3/scripts/fastq/fq_cutting.py +88 -0
  33. riboparser-0.1.3/scripts/fastq/fq_len_flt.py +84 -0
  34. riboparser-0.1.3/scripts/fastq/fq_len_sum.py +84 -0
  35. riboparser-0.1.3/scripts/fastq/fq_split.py +94 -0
  36. riboparser-0.1.3/scripts/fastq/fq_trim.py +86 -0
  37. riboparser-0.1.3/scripts/fastq/phred_quality.py +96 -0
  38. riboparser-0.1.3/scripts/merge_ribo/__init__.py +0 -0
  39. riboparser-0.1.3/scripts/merge_ribo/merge_cdt.py +105 -0
  40. riboparser-0.1.3/scripts/merge_ribo/merge_coverage.py +91 -0
  41. riboparser-0.1.3/scripts/merge_ribo/merge_cst.py +103 -0
  42. riboparser-0.1.3/scripts/merge_ribo/merge_digestion.py +121 -0
  43. riboparser-0.1.3/scripts/merge_ribo/merge_dst_list.py +108 -0
  44. riboparser-0.1.3/scripts/merge_ribo/merge_length.py +112 -0
  45. riboparser-0.1.3/scripts/merge_ribo/merge_metagene.py +93 -0
  46. riboparser-0.1.3/scripts/merge_ribo/merge_occupancy.py +104 -0
  47. riboparser-0.1.3/scripts/merge_ribo/merge_odd_ratio.py +93 -0
  48. riboparser-0.1.3/scripts/merge_ribo/merge_offset.py +117 -0
  49. riboparser-0.1.3/scripts/merge_ribo/merge_offset_detail.py +162 -0
  50. riboparser-0.1.3/scripts/merge_ribo/merge_offset_end.py +152 -0
  51. riboparser-0.1.3/scripts/merge_ribo/merge_pausing.py +109 -0
  52. riboparser-0.1.3/scripts/merge_ribo/merge_period.py +92 -0
  53. riboparser-0.1.3/scripts/merge_ribo/merge_quant.py +91 -0
  54. riboparser-0.1.3/scripts/merge_ribo/merge_saturation.py +100 -0
  55. riboparser-0.1.3/scripts/oligo/__init__.py +0 -0
  56. riboparser-0.1.3/scripts/oligo/get_overlap_seq.py +85 -0
  57. riboparser-0.1.3/scripts/oligo/get_tissue_freq.py +133 -0
  58. riboparser-0.1.3/scripts/oligo/get_win_seq.py +54 -0
  59. riboparser-0.1.3/scripts/ribocode/__init__.py +0 -0
  60. riboparser-0.1.3/scripts/ribocode/ribocode_bed_format.py +254 -0
  61. riboparser-0.1.3/scripts/ribotish/__init__.py +0 -0
  62. riboparser-0.1.3/scripts/ribotish/ribotish_format.py +327 -0
  63. riboparser-0.1.3/scripts/rsem/__init__.py +0 -0
  64. riboparser-0.1.3/scripts/rsem/merge_rsem.py +76 -0
  65. riboparser-0.1.3/scripts/unix/__init__.py +0 -0
  66. riboparser-0.1.3/scripts/unix/dos2unix.py +54 -0
  67. riboparser-0.1.3/setup.cfg +4 -0
  68. riboparser-0.1.3/setup.py +141 -0
  69. riboparser-0.1.3/utils/__init__.py +8 -0
  70. riboparser-0.1.3/utils/make_ensb_ref.py +308 -0
  71. riboparser-0.1.3/utils/make_ribo_ref.py +39 -0
  72. riboparser-0.1.3/utils/ribo/ArgsParser.py +1294 -0
  73. riboparser-0.1.3/utils/ribo/Bam2Wig.py +292 -0
  74. riboparser-0.1.3/utils/ribo/BamFilter.py +90 -0
  75. riboparser-0.1.3/utils/ribo/CDT.py +298 -0
  76. riboparser-0.1.3/utils/ribo/CST.py +662 -0
  77. riboparser-0.1.3/utils/ribo/Codon.py +206 -0
  78. riboparser-0.1.3/utils/ribo/Coefficient_of_Variation.py +340 -0
  79. riboparser-0.1.3/utils/ribo/Coverage.py +394 -0
  80. riboparser-0.1.3/utils/ribo/Cumulative_CoV.py +252 -0
  81. riboparser-0.1.3/utils/ribo/Density.py +226 -0
  82. riboparser-0.1.3/utils/ribo/Digestion.py +296 -0
  83. riboparser-0.1.3/utils/ribo/Ensembl_Ref.py +272 -0
  84. riboparser-0.1.3/utils/ribo/GenePred.py +504 -0
  85. riboparser-0.1.3/utils/ribo/MetaCodon.py +462 -0
  86. riboparser-0.1.3/utils/ribo/Metaplot.py +275 -0
  87. riboparser-0.1.3/utils/ribo/Occupancy.py +293 -0
  88. riboparser-0.1.3/utils/ribo/Odd_Ratio.py +538 -0
  89. riboparser-0.1.3/utils/ribo/Offset.py +626 -0
  90. riboparser-0.1.3/utils/ribo/Offset_RSBM.py +647 -0
  91. riboparser-0.1.3/utils/ribo/Pausing.py +451 -0
  92. riboparser-0.1.3/utils/ribo/Periodicity.py +169 -0
  93. riboparser-0.1.3/utils/ribo/Quality.py +668 -0
  94. riboparser-0.1.3/utils/ribo/Quant.py +385 -0
  95. riboparser-0.1.3/utils/ribo/RNA.py +383 -0
  96. riboparser-0.1.3/utils/ribo/RPFs.py +314 -0
  97. riboparser-0.1.3/utils/ribo/Retrieve.py +146 -0
  98. riboparser-0.1.3/utils/ribo/Ribo.py +482 -0
  99. riboparser-0.1.3/utils/ribo/Shuffle.py +148 -0
  100. riboparser-0.1.3/utils/ribo/__init__.py +6 -0
  101. riboparser-0.1.3/utils/ribo_parser.py +88 -0
  102. riboparser-0.1.3/utils/rna_Density.py +40 -0
  103. riboparser-0.1.3/utils/rna_Offset.py +48 -0
  104. riboparser-0.1.3/utils/rpf_Bam2bw.py +40 -0
  105. riboparser-0.1.3/utils/rpf_Bam_Filter.py +27 -0
  106. riboparser-0.1.3/utils/rpf_CDT.py +40 -0
  107. riboparser-0.1.3/utils/rpf_CST.py +42 -0
  108. riboparser-0.1.3/utils/rpf_Check.py +51 -0
  109. riboparser-0.1.3/utils/rpf_CoV.py +43 -0
  110. riboparser-0.1.3/utils/rpf_Corr.py +144 -0
  111. riboparser-0.1.3/utils/rpf_Coverage.py +42 -0
  112. riboparser-0.1.3/utils/rpf_Cumulative_CoV.py +39 -0
  113. riboparser-0.1.3/utils/rpf_Density.py +39 -0
  114. riboparser-0.1.3/utils/rpf_Digest.py +39 -0
  115. riboparser-0.1.3/utils/rpf_Geneplot.py +174 -0
  116. riboparser-0.1.3/utils/rpf_Merge.py +66 -0
  117. riboparser-0.1.3/utils/rpf_Meta_Codon.py +44 -0
  118. riboparser-0.1.3/utils/rpf_Metaplot.py +34 -0
  119. riboparser-0.1.3/utils/rpf_Occupancy.py +35 -0
  120. riboparser-0.1.3/utils/rpf_Odd_Ratio.py +41 -0
  121. riboparser-0.1.3/utils/rpf_Offset.py +54 -0
  122. riboparser-0.1.3/utils/rpf_Offset_RSBM.py +40 -0
  123. riboparser-0.1.3/utils/rpf_Pausing.py +47 -0
  124. riboparser-0.1.3/utils/rpf_Periodicity.py +35 -0
  125. riboparser-0.1.3/utils/rpf_Quant.py +44 -0
  126. riboparser-0.1.3/utils/rpf_Reference.py +39 -0
  127. riboparser-0.1.3/utils/rpf_Retrieve.py +36 -0
  128. riboparser-0.1.3/utils/rpf_Shuffle.py +31 -0
  129. riboparser-0.1.3/utils/rpf_end.py +210 -0
  130. riboparser-0.1.3/utils/serp_overlap.py +126 -0
  131. riboparser-0.1.3/utils/serp_peak.py +39 -0
  132. riboparser-0.1.3/utils/serp_properties.py +31 -0
@@ -0,0 +1,1316 @@
1
+ Metadata-Version: 2.1
2
+ Name: RiboParser
3
+ Version: 0.1.3
4
+ Summary: A pipeline for ribosome profiling data analysis
5
+ Home-page: https://github.com/renscq/RiboParser
6
+ Author: Ren Shuchao
7
+ Author-email: rensc0718@163.com
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.12
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: numpy~=1.26.4
14
+ Requires-Dist: pandas~=2.2.2
15
+ Requires-Dist: pyarrow~=16.1.0
16
+ Requires-Dist: polars~=0.20.31
17
+ Requires-Dist: biopython~=1.78
18
+ Requires-Dist: scipy~=1.12.0
19
+ Requires-Dist: scikit-learn~=1.4.2
20
+ Requires-Dist: statsmodels~=0.14.2
21
+ Requires-Dist: pysam~=0.22.1
22
+ Requires-Dist: joblib~=1.4.2
23
+ Requires-Dist: interval~=1.0.0
24
+ Requires-Dist: matplotlib~=3.8.4
25
+ Requires-Dist: matplotlib-venn~=1.1.1
26
+ Requires-Dist: seaborn~=0.13.2
27
+ Requires-Dist: plotly~=5.22.0
28
+ Requires-Dist: seqlogo~=5.29.9
29
+ Requires-Dist: kaleido~=0.2.1
30
+
31
+ <!--
32
+ * @Author: 'rensc' 'rensc0718@163.com'
33
+ * @Date: 2024-10-15 11:44:58
34
+ * @LastEditors: 'rensc' 'rensc0718@163.com'
35
+ * @LastEditTime: 2024-10-20 07:06:36
36
+ * @FilePath: \RiboParser\README.md
37
+ *
38
+ -->
39
+
40
+ # RiboParser
41
+
42
+ 为了便于理解和使用,这里对公开的项目数据进行分析,并拆解每一个分析步骤,来展示完整的工作流程。
43
+ 这个过程包括了通用的分析步骤,以及定制的 `RiboParser` 和 `RiboShiny` 的分析和可视化步骤。
44
+
45
+ 1. 软件的安装
46
+ 2. 参考文件的创建
47
+ 3. 原始数据的下载
48
+ 4. 原始数据清洗
49
+ 5. 数据比对
50
+ 6. 测序质量分析
51
+ 7. 基因水平分析
52
+ 8. 密码子水平分析
53
+
54
+ 以上的数据分析输出的结果可以在 `RiboShiny` 中进行下游的分析和可视化。
55
+
56
+
57
+ ## 1. 软件的安装
58
+
59
+ ### 1. conda 创建环境
60
+ ```bash
61
+ conda create -n ribo
62
+ conda activate ribo
63
+ ```
64
+
65
+ ### 2. conda 安装软件依赖
66
+ ```bash
67
+ conda install cutadapt
68
+ conda install bowtie
69
+ conda install samtools
70
+ conda install star
71
+ conda install bedtools
72
+ conda install subread
73
+ conda install rsem
74
+ conda install pigz
75
+ conda install gffread
76
+ conda install sra-tools
77
+ conda install ucsc-genepredtogtf
78
+ conda install ucsc-gtftogenepred
79
+ conda install ucsc-gff3togenepred
80
+ conda install ucsc-bedgraphtobigwig
81
+ conda install ucsc-bedsort
82
+ ```
83
+
84
+ ### 3. conda 安装 RiboParser
85
+ ```bash
86
+ conda install riboparser -c rensc
87
+ ```
88
+
89
+ ### 4. 测试安装状态:
90
+ 测试软件的依赖、安装和运行问题。
91
+
92
+ ```bash
93
+ rpf_test
94
+ ```
95
+
96
+ ## 2. 准备参考文件
97
+
98
+ ### 1. 完整项目目录示例如下:
99
+
100
+ 完整的数据分析包含了参考文献的准备、RNA-seq的数据分析、Ribo-seq 的数据分析。
101
+
102
+ ```
103
+ $ cd /mnt/t64/test/sce/
104
+ $ tree
105
+
106
+ .
107
+ ├── 1.reference
108
+ │   ├── cdna
109
+ │   ├── genome
110
+ │   ├── gtf
111
+ │   ├── mrna
112
+ │   ├── norm
113
+ │   ├── ncrna
114
+ │   ├── rrna
115
+ │   ├── rsem-index
116
+ │   ├── star-index
117
+ │   └── trna
118
+ ├── 2.rawdata
119
+ │   ├── rna-seq
120
+ │   └── ribo-seq
121
+ ├── 3.rna-seq
122
+ │   ├── 1.cleandata
123
+ │   ├── 2.bowtie
124
+ │   ├── 3.star
125
+ │   ├── 4.quantification
126
+ │   └── 5.riboparser
127
+ │      ├── 01.qc
128
+ │      ├── 03.offset
129
+ │      ├── 04.density
130
+ │      ├── 05.merge
131
+ │      ├── 06.periodicity
132
+ │      ├── 07.metaplot
133
+ │      ├── 08.coverage
134
+ │      ├── 09.correlation
135
+ │      ├── 10.shuffle
136
+ │      └── 11.gene_density
137
+ ├── 4.ribo-seq
138
+ │   ├── 1.cleandata
139
+ │   ├── 2.bowtie
140
+ │   ├── 3.star
141
+ │   ├── 4.quantification
142
+ │   └── 5.riboparser
143
+ │      ├── 01.qc
144
+ │      ├── 02.digestion
145
+ │      ├── 03.offset
146
+ │      ├── 04.density
147
+ │      ├── 05.merge
148
+ │      ├── 06.periodicity
149
+ │      ├── 07.metaplot
150
+ │      ├── 08.coverage
151
+ │      ├── 09.correlation
152
+ │      ├── 10.quantification
153
+ │      ├── 11.pausing_score
154
+ │      ├── 12.codon_occupancy
155
+ │      ├── 13.codon_decoding_time
156
+ │      ├── 14.codon_selection_time
157
+ │      ├── 15.coefficient_of_variation
158
+ │      ├── 16.meta_codon
159
+ │      ├── 17.shuffle
160
+ │      └── 18.gene_density
161
+ └── 5.test
162
+
163
+ ```
164
+
165
+ ### 2. 准本参考基因组索引
166
+
167
+ #### 2.1. 创建目录
168
+
169
+ 创建文件夹用于放置不同类型的参考序列文件。
170
+
171
+ ```bash
172
+ $ cd /mnt/t64/test/sce/1.reference/
173
+
174
+ $ mkdir cdna genome gtf mrna ncrna rrna trna norm rsem-index
175
+ ```
176
+
177
+ #### 2.2 从 NCBI 下载参考文件
178
+
179
+ 使用最常用的数据分析文件格式,基因组序列为 fasta 格式,参考文件为 GTF 或者 GFF3 格式。
180
+
181
+ ```bash
182
+ # genome sequence
183
+ $ wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/146/045/GCF_000146045.2_R64/GCF_000146045.2_R64_genomic.fna.gz
184
+
185
+ # GTF or GFF3
186
+ $ wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/146/045/GCF_000146045.2_R64/GCF_000146045.2_R64_genomic.gtf.gz
187
+ $ wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/146/045/GCF_000146045.2_R64/GCF_000146045.2_R64_genomic.gff.gz
188
+
189
+ # cDNA sequence
190
+ $ wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/146/045/GCF_000146045.2_R64/GCF_000146045.2_R64_rna.fna.gz
191
+
192
+ # feature table
193
+ $ wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/146/045/GCF_000146045.2_R64/GCF_000146045.2_R64_feature_table.txt.gz
194
+
195
+ # decompression
196
+ $ gunzip *.gz
197
+
198
+ $ gffread -g GCF_000146045.2_R64_genomic.fna GCF_000146045.2_R64_genomic.gff -F -w cdna.fa
199
+ ```
200
+
201
+ #### 2.3 使用 bowtie 创建 genome 索引
202
+
203
+ ```bash
204
+ $ cd /mnt/t64/test/sce/1.reference/genome
205
+
206
+ $ bowtie-build ../GCF_000146045.2_R64_genomic.fna genome.fa genome
207
+ ```
208
+
209
+ #### 2.4 使用 bowtie 创建 mRNA 索引
210
+
211
+ ```bash
212
+ $ cd mrna
213
+
214
+ # filter the mrna sequence
215
+ $ grep -i 'gbkey=mRNA' cdna.fa | cut -d ' ' -f 1 | cut -c 2- > mrna.ids
216
+
217
+ $ retrieve_seq -i cdna.fa -n mrna.ids -o mrna.fa
218
+
219
+ $ bowtie-build mrna.fa mrna
220
+ ```
221
+
222
+ #### 2.5 使用 bowtie 创建 rRNA 索引
223
+ ```bash
224
+ $ cd /mnt/t64/test/sce/1.reference/rrna
225
+
226
+ # filter the rrna sequence
227
+ $ grep -i 'gbkey=rRNA' cdna.fa | cut -d ' ' -f 1 | cut -c 2- > rrna.ids
228
+
229
+ $ retrieve_seq -i cdna.fa -n rrna.ids -o rrna.fa
230
+
231
+ $ bowtie-build rrna.fa rrna
232
+ ```
233
+
234
+ #### 2.6 使用 bowtie 创建 tRNA 索引
235
+ ```bash
236
+ $ cd /mnt/t64/test/sce/1.reference/trna
237
+
238
+ # filter the trna sequence
239
+ $ grep -i 'gbkey=tRNA' cdna.fa | cut -d ' ' -f 1 | cut -c 2- > trna.ids
240
+
241
+ $ retrieve_seq -i cdna.fa -n trna.ids -o trna.fa
242
+
243
+ $ bowtie-build trna.fa trna
244
+ ```
245
+
246
+
247
+ #### 2.7 使用 bowtie 创建 ncRNA 索引
248
+ ```bash
249
+ $ cd /mnt/t64/test/sce/1.reference/ncrna
250
+
251
+ # filter the ncrna sequence
252
+ $ grep -iE 'gbkey=ncRNA|gbkey=lnc_RNA|gbkey=miRNA|gbkey=snoRNA|gbkey=snRNA|gbkey=misc_RNA' cdna.fa | cut -d ' ' -f 1 | cut -c 2- > ncrna.ids
253
+
254
+ $ retrieve_seq -i cdna.fa -n ncrna.ids -o ncrna.fa
255
+
256
+ $ bowtie-build ncrna.fa ncrna
257
+ ```
258
+
259
+ #### 2.8 标准化 gtf 文件
260
+ ```bash
261
+ $ cd /mnt/t64/test/sce/1.reference/norm/
262
+
263
+ $ rpf_Reference \
264
+ -g ../GCF_000146045.2_R64_genomic.fna \
265
+ -t ../GCF_000146045.2_R64_genomic.gff \
266
+ -u 30 -o sce
267
+ ```
268
+
269
+ #### 2.9 使用 star 创建 genome 索引
270
+ ```bash
271
+ $ cd /mnt/t64/test/sce/1.reference/
272
+
273
+ $ STAR \
274
+ --genomeSAindexNbases 11 \
275
+ --runThreadN 12 \
276
+ --runMode genomeGenerate \
277
+ --genomeDir star-index \
278
+ --genomeFastaFiles GCF_000146045.2_R64_genomic.fna \
279
+ --sjdbGTFfile ./norm/sce.norm.gtf
280
+
281
+ ```
282
+
283
+ #### 2.10 使用 rsem 创建 transcriptome 索引
284
+ ```bash
285
+ $ cd /mnt/t64/test/sce/1.reference/rsem-index/
286
+
287
+ $ rsem-prepare-reference \
288
+ -p 10 \
289
+ --gtf ../norm/sce.norm.gtf ../GCF_000146045.2_R64_genomic.fna sce
290
+
291
+ ```
292
+
293
+
294
+ ## 3. 示例
295
+ 为了展示 RiboParser 的分析流程和使用方法,这里使用数据集 GSE67387 的 RNA-seq 和 Ribo-seq 数据做示例。
296
+
297
+ ```shell
298
+ # dataset
299
+ https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE67387
300
+
301
+ # reference
302
+ Nedialkova DD, Leidel SA. Optimization of Codon Translation Rates via tRNA Modifications Maintains Proteome Integrity. Cell 2015 Jun 18;161(7):1606-18.
303
+ PMID: 26052047
304
+ ```
305
+
306
+
307
+ ### 3.1 GSE67387 数据基础分析
308
+
309
+ #### 3.1.1 下载原始数据
310
+ 1. 下载 RNA-seq 数据
311
+ 使用 `sra-tools` 中的 `prefetch` 下载原始的 sra 格式数据,并解压为 fastq 格式文件。
312
+ ```bash
313
+ $ cd /mnt/t64/test/sce/2.rawdata/rna-seq/
314
+
315
+ #################################################
316
+ # download rna-seq
317
+ $ prefetch -o SRR1944925.sra SRR1944925
318
+ $ prefetch -o SRR1944926.sra SRR1944926
319
+ $ prefetch -o SRR1944927.sra SRR1944927
320
+ $ prefetch -o SRR1944928.sra SRR1944928
321
+ $ prefetch -o SRR1944929.sra SRR1944929
322
+ $ prefetch -o SRR1944930.sra SRR1944930
323
+ $ prefetch -o SRR1944931.sra SRR1944931
324
+ $ prefetch -o SRR1944932.sra SRR1944932
325
+ $ prefetch -o SRR1944933.sra SRR1944933
326
+ $ prefetch -o SRR1944934.sra SRR1944934
327
+ $ prefetch -o SRR1944935.sra SRR1944935
328
+
329
+ # decompression
330
+ for sra in *.sra
331
+ do
332
+ fastq-dump $sra
333
+ pigz *fastq
334
+ done
335
+ ```
336
+
337
+ 2. 下载 Ribo-seq 数据
338
+ ```bash
339
+ cd /mnt/t64/test/sce/2.rawdata/ribo-seq/
340
+
341
+ #################################################
342
+ # download ribo-seq
343
+ prefetch -o SRR1944912.sra SRR1944912
344
+ prefetch -o SRR1944913.sra SRR1944913
345
+ prefetch -o SRR1944914.sra SRR1944914
346
+ prefetch -o SRR1944915.sra SRR1944915
347
+ prefetch -o SRR1944916.sra SRR1944916
348
+ prefetch -o SRR1944917.sra SRR1944917
349
+ prefetch -o SRR1944918.sra SRR1944918
350
+ prefetch -o SRR1944919.sra SRR1944919
351
+ prefetch -o SRR1944920.sra SRR1944920
352
+ prefetch -o SRR1944921.sra SRR1944921
353
+ prefetch -o SRR1944922.sra SRR1944922
354
+ prefetch -o SRR1944923.sra SRR1944923
355
+
356
+ # decompression
357
+ for sra in *.sra
358
+ do
359
+ fastq-dump $sra
360
+ pigz *fastq
361
+ done
362
+ ```
363
+
364
+
365
+ #### 3.1.2 数据清洗
366
+ 因为该项目提供的原始数据是清洗后的,所以并不包含接头序列,这里只展示通用步骤。
367
+
368
+ 1. 清洗 RNA-seq 数据
369
+ ```bash
370
+ $ cd /mnt/t64/test/sce/3.rna-seq/1.cleandata/
371
+
372
+ #################################################
373
+ # run the cutadapt
374
+ for fq in /mnt/t64/test/sce/2.rawdata/rna-seq/*fastq.gz
375
+ do
376
+ cutadapt --match-read-wildcards \
377
+ -a AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGC \
378
+ -m 10 -O 6 -j 10 \
379
+ -o `\basename $fq fastq.gz`clean.fastq.gz $fq &> $fq".log"
380
+ done
381
+ ```
382
+
383
+ 2. 清洗 Ribo-seq 数据
384
+ ```bash
385
+ $ cd /mnt/t64/test/sce/4.ribo-seq/1.cleandata/
386
+
387
+ #################################################
388
+ # run the cutadapt
389
+ for fq in /mnt/t64/test/sce/2.rawdata/ribo-seq/*fastq.gz
390
+ do
391
+ cutadapt --match-read-wildcards \
392
+ -a AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGC \
393
+ -m 10 -O 6 -j 10 \
394
+ -o `\basename $fq fastq.gz`clean.fastq.gz $fq &> $fq".log"
395
+ done
396
+ ```
397
+
398
+
399
+ #### 3.1.3 把 clean data 比对到不同类型的参考文件
400
+ 为了确定文库的质量,排除不同 ncRNA 来源的 reads 对后续分析的影响,这里使用 `bowtie` 对数据进行分类。
401
+ 正常情况下,尤其是使用 oligoDT 方法构建的 RNA-seq 文库,其中的 reads 大多来源于 mRNA。所以对于 RNA-seq 的分析而言,这个步骤不是必须的。
402
+
403
+ 1. 比对 RNA-seq 数据
404
+ ```bash
405
+ $ cd /mnt/t64/test/sce/3.rna-seq/2.bowtie/
406
+
407
+ #################################################
408
+ # set database
409
+ rrna='/mnt/t64/test/sce/1.reference/rrna/rrna'
410
+ trna='/mnt/t64/test/sce/1.reference/trna/trna'
411
+ ncrna='/mnt/t64/test/sce/1.reference/ncrna/ncrna'
412
+ mrna='/mnt/t64/test/sce/1.reference/mrna/mrna'
413
+ chrom='/mnt/t64/test/sce/1.reference/genome/genome'
414
+
415
+ # alignment reads to reference
416
+ for fq in /mnt/t64/test/sce/3.rna-seq/1.cleandata/*fastq.gz
417
+ do
418
+ fqname=`\basename $fq .fastq.gz`
419
+
420
+ ## rrna
421
+ bowtie -p 10 -v 1 --un="$fqname".norrna.fq --al="$fqname".rrna.fq \
422
+ -x $rrna $fq -S "$fqname".rrna.sam 2>> "$fqname".log
423
+
424
+ ## trna
425
+ bowtie -p 10 -v 1 --un="$fqname".notrna.fq --al="$fqname".trna.fq \
426
+ -x $trna "$fqname".norrna.fq -S "$fqname".trna.sam 2>> "$fqname".log
427
+
428
+ ## ncrna
429
+ bowtie -p 10 -v 1 --un="$fqname".noncrna.fq --al="$fqname".ncrna.fq \
430
+ -x $ncrna "$fqname".notrna.fq -S "$fqname".ncrna.sam 2>> "$fqname".log
431
+
432
+ ## mrna
433
+ bowtie -p 10 -v 1 --un="$fqname".nomrna.fq --al="$fqname".mrna.fq \
434
+ -x $mrna "$fqname".noncrna.fq -S "$fqname".mrna.sam 2>> "$fqname".log
435
+
436
+ ## genome
437
+ bowtie -p 10 -v 1 --un="$fqname".nogenome.fq --al="$fqname".genome.fq 、
438
+ -x $chrom "$fqname".nomrna.fq -S "$fqname".genome.sam 2>> "$fqname".log
439
+
440
+ ## compress fastq
441
+ pigz *fq
442
+
443
+ ## compress sam
444
+ for sam in *.sam
445
+ do
446
+ samtools view -h -F 4 $sam | samtools sort -@ $threads -o `\basename $sam sam`bam
447
+ rm $sam
448
+ done
449
+
450
+ done
451
+ ```
452
+
453
+ 2. 统计所有数据库的比对结果
454
+ ```bash
455
+ #################################################
456
+ # merge all log files
457
+ merge_bwt_log \
458
+ -n rRNA,tRNA,ncRNA,mRNA,Genome \
459
+ -l *log -o sce
460
+
461
+ ```
462
+
463
+ 3. 比对 Ribo-seq 数据
464
+ ```bash
465
+ $ cd /mnt/t64/test/sce/4.ribo-seq/2.bowtie/
466
+
467
+ #################################################
468
+ # set database
469
+ rrna='/mnt/t64/test/sce/1.reference/rrna/rrna'
470
+ trna='/mnt/t64/test/sce/1.reference/trna/trna'
471
+ ncrna='/mnt/t64/test/sce/1.reference/ncrna/ncrna'
472
+ mrna='/mnt/t64/test/sce/1.reference/mrna/mrna'
473
+ chrom='/mnt/t64/test/sce/1.reference/genome/genome'
474
+
475
+ # alignment reads to reference
476
+ for fq in /mnt/t64/test/sce/4.ribo-seq/1.cleandata/*fastq.gz
477
+ do
478
+ fqname=`\basename $fq .fastq.gz`
479
+
480
+ ## rrna
481
+ bowtie -p 10 -v 1 --un="$fqname".norrna.fq --al="$fqname".rrna.fq \
482
+ -x $rrna $fq -S "$fqname".rrna.sam 2>> "$fqname".log
483
+
484
+ ## trna
485
+ bowtie -p 10 -v 1 --un="$fqname".notrna.fq --al="$fqname".trna.fq \
486
+ -x $trna "$fqname".norrna.fq -S "$fqname".trna.sam 2>> "$fqname".log
487
+
488
+ ## ncrna
489
+ bowtie -p 10 -v 1 --un="$fqname".noncrna.fq --al="$fqname".ncrna.fq \
490
+ -x $ncrna "$fqname".notrna.fq -S "$fqname".ncrna.sam 2>> "$fqname".log
491
+
492
+ ## mrna
493
+ bowtie -p 10 -v 1 --un="$fqname".nomrna.fq --al="$fqname".mrna.fq \
494
+ -x $mrna "$fqname".noncrna.fq -S "$fqname".mrna.sam 2>> "$fqname".log
495
+
496
+ ## genome
497
+ bowtie -p 10 -v 1 --un="$fqname".nogenome.fq --al="$fqname".genome.fq 、
498
+ -x $chrom "$fqname".nomrna.fq -S "$fqname".genome.sam 2>> "$fqname".log
499
+
500
+ ## compress fastq
501
+ pigz *fq
502
+
503
+ ## compress sam
504
+ for sam in *.sam
505
+ do
506
+ samtools view -h -F 4 $sam | samtools sort -@ $threads -o `\basename $sam sam`bam
507
+ rm $sam
508
+ done
509
+
510
+ done
511
+
512
+ ```
513
+
514
+ 4. 统计所有数据库的比对结果
515
+ ```bash
516
+ #################################################
517
+ # merge all log files
518
+ merge_bwt_log \
519
+ -n rRNA,tRNA,ncRNA,mRNA,Genome \
520
+ -l *log -o sce
521
+
522
+ ```
523
+
524
+
525
+ #### 3.1.4 使用 STAR 比对 mRNA 的 reads
526
+ 去除掉 ncRNA 的 reads 之后,使用 star 重新比对到酵母的基因组。
527
+
528
+ 1. 使用 star 比对 RNA-seq 的数据
529
+ ```bash
530
+ cd /mnt/t64/test/sce/3.rna-seq/3.star/
531
+
532
+ #################################################
533
+ # set the option and database
534
+ genome='/mnt/t64/test/sce/1.reference/star-index/'
535
+
536
+ #################################################
537
+ # map the all rna-seq reads to genome and transcriptome region
538
+ for fastq in /mnt/t64/test/sce/3.rna-seq/2.bowtie/*.noncrna.fq.gz
539
+ do
540
+
541
+ ## get file name
542
+ output=$(basename $fastq .noncrna.fq.gz)
543
+
544
+ #################################################
545
+ ## run the alignment
546
+ STAR --runThreadN 10 \
547
+ --readFilesCommand zcat \
548
+ --genomeDir $genome \
549
+ --readFilesIn $fastq \
550
+ --outFileNamePrefix $output \
551
+ --outSAMtype BAM Unsorted \
552
+ --outFilterType BySJout \
553
+ --quantMode TranscriptomeSAM GeneCounts \
554
+ --outReadsUnmapped Fastx \
555
+ --outSAMattributes All \
556
+ --alignEndsType Local \
557
+ --outFilterMultimapNmax 3 \
558
+ --outFilterMismatchNmax 1 \
559
+ --alignIntronMax 10000 \
560
+ --outFilterMatchNmin 20
561
+ # --outWigType wiggle --outWigNorm RPM
562
+
563
+ pigz *mate1
564
+
565
+ #################################################
566
+ ## sort the bam file
567
+ samtools sort -@ 10 $output"Aligned.out.bam" -o $output"Aligned.sortedByCoord.out.bam"
568
+ samtools index -@ 10 $output"Aligned.sortedByCoord.out.bam"
569
+ rm $output"Aligned.out.bam"
570
+
571
+ done
572
+ ```
573
+
574
+ 2. 使用 star 比对 Ribo-seq 的数据
575
+ ```bash
576
+ cd /mnt/t64/test/sce/4.ribo-seq/3.star/
577
+
578
+ #################################################
579
+ # set the option and database
580
+ genome='/mnt/t64/test/sce/1.reference/star-index/'
581
+
582
+ #################################################
583
+ # map the all rna-seq reads to genome and transcriptome region
584
+ for fastq in /mnt/t64/test/sce/4.ribo-seq/2.bowtie/*.noncrna.fq.gz
585
+ do
586
+
587
+ ## get file name
588
+ output=$(basename $fastq .noncrna.fq.gz)
589
+
590
+ #################################################
591
+ ## run the alignment
592
+ STAR --runThreadN 10 \
593
+ --readFilesCommand zcat \
594
+ --genomeDir $genome \
595
+ --readFilesIn $fastq \
596
+ --outFileNamePrefix $output \
597
+ --outSAMtype BAM Unsorted \
598
+ --outFilterType BySJout \
599
+ --quantMode TranscriptomeSAM GeneCounts \
600
+ --outReadsUnmapped Fastx \
601
+ --outSAMattributes All \
602
+ --alignEndsType Local \
603
+ --outFilterMultimapNmax 3 \
604
+ --outFilterMismatchNmax 1 \
605
+ --alignIntronMax 10000 \
606
+ --outFilterMatchNmin 20
607
+ # --outWigType wiggle --outWigNorm RPM
608
+
609
+ pigz *mate1
610
+
611
+ #################################################
612
+ ## sort the bam file
613
+ samtools sort -@ 10 $output"Aligned.out.bam" -o $output"Aligned.sortedByCoord.out.bam"
614
+ samtools index -@ 10 $output"Aligned.sortedByCoord.out.bam"
615
+ rm $output"Aligned.out.bam"
616
+
617
+ done
618
+ ```
619
+
620
+
621
+ #### 3.1.5 使用 RSEM 或者 featureCounts 定量基因表达水平
622
+ 我们可以已使用 RSEM 或者 featureCounts来对基因的表达水平进行定量,二者各有特色,这里使用 RSEM 做示例。
623
+
624
+ 1. 定量 RNA-seq 的转录水平
625
+ ```bash
626
+ $ cd /mnt/t64/test/sce/3.rna-seq/4.quantification/
627
+
628
+ #################################################
629
+ # quantify the gene expression
630
+ for bam in /mnt/t64/test/sce/3.rna-seq/3.star/*Aligned.toTranscriptome.out.bam
631
+ do
632
+ rsem-calculate-expression -p 10 --no-bam-output --alignments -q $bam /mnt/t64/test/sce/1.reference/rsem-index/sce `\basename $bam Aligned.toTranscriptome.out.bam`
633
+ # rsem-calculate-expression -p 10 --paired-end --no-bam-output --alignments -q $bam /mnt/t64/test/sce/1.reference/rsem-index/sce `\basename $bam Aligned.toTranscriptome.out.bam`
634
+ done
635
+ ```
636
+
637
+ 2. 合并 RNA-seq 的数据定量结果
638
+ ```bash
639
+ #################################################
640
+ # merge the gene expression
641
+ merge_rsem -c expected_count -l *.genes.results -o gene.expected_count.txt
642
+ merge_rsem -c TPM -l *.genes.results -o gene.TPM.txt
643
+ merge_rsem -c FPKM -l *.genes.results -o gene.FPKM.txt
644
+
645
+ #################################################
646
+ # merge the isoforms expression
647
+ merge_rsem -c expected_count -l *.isoforms.results -o isoforms.expected_count.txt
648
+ merge_rsem -c TPM -l *.isoforms.results -o isoforms.TPM.txt
649
+ merge_rsem -c FPKM -l *.isoforms.results -o isoforms.FPKM.txt
650
+
651
+ ```
652
+
653
+
654
+ 3. 定量 Ribo-seq 的转录水平
655
+ ```bash
656
+ $ cd /mnt/t64/test/sce/4.ribo-seq/4.quantification/
657
+
658
+ #################################################
659
+ # quantify the isoforms expression
660
+ for bam in /mnt/t64/test/sce/4.ribo-seq/3.star/*Aligned.toTranscriptome.out.bam
661
+ do
662
+ rsem-calculate-expression -p 10 --no-bam-output --alignments -q $bam /mnt/t64/test/sce/1.reference/rsem-index/sce `\basename $bam Aligned.toTranscriptome.out.bam`
663
+ # rsem-calculate-expression -p 10 --paired-end --no-bam-output --alignments -q $bam /mnt/t64/test/sce/1.reference/rsem-index/sce `\basename $bam Aligned.toTranscriptome.out.bam`
664
+ done
665
+ ```
666
+
667
+ 4. 合并 Ribo-seq 的数据定量结果
668
+ ```bash
669
+ #################################################
670
+ # merge the gene expression
671
+ merge_rsem -c expected_count -l *.genes.results -o gene.expected_count.txt
672
+ merge_rsem -c TPM -l *.genes.results -o gene.TPM.txt
673
+ merge_rsem -c FPKM -l *.genes.results -o gene.FPKM.txt
674
+
675
+ #################################################
676
+ # merge the isoforms expression
677
+ merge_rsem -c expected_count -l *.isoforms.results -o isoforms.expected_count.txt
678
+ merge_rsem -c TPM -l *.isoforms.results -o isoforms.TPM.txt
679
+ merge_rsem -c FPKM -l *.isoforms.results -o isoforms.FPKM.txt
680
+
681
+ ```
682
+
683
+
684
+ ### 3.2 使用 RiboParser 继续完成 GSE67387 的数据分析
685
+ #### 3.2.1 测序数据的质量检查
686
+ 1. 检查 Ribo-seq 数据的测序质量
687
+ ```bash
688
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/01.qc/
689
+
690
+ #################################################
691
+ # check the ribo-seq quality
692
+ for bam in /mnt/t64/test/sce/4.ribo-seq/3.star/*Aligned.toTranscriptome.out.bam
693
+ do
694
+ prefix_name=$(basename $bam Aligned.toTranscriptome.out.bam)
695
+
696
+ rpf_Check -b $bam -s --thread 10 -t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
697
+ -o $prefix_name &> $prefix_name".log"
698
+
699
+ done
700
+ ```
701
+
702
+ 2. 合并所有样本的质量分析结果
703
+ ```bash
704
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/
705
+
706
+ #################################################
707
+ # merge the ribo-seq quality results
708
+ merge_length -l ./01.qc/*length_distribution.txt -o sce
709
+ merge_saturation -l ./01.qc/*gene_saturation.txt -o sce
710
+
711
+ ```
712
+
713
+
714
+ 3. 检查 RNA-seq 数据的测序质量
715
+ ```bash
716
+ $ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/01.qc/
717
+
718
+ #################################################
719
+ # check the ribo-seq quality
720
+ for bam in /mnt/t64/test/sce/3.rna-seq/3.star/*Aligned.toTranscriptome.out.bam
721
+ do
722
+ prefix_name=$(basename $bam Aligned.toTranscriptome.out.bam)
723
+
724
+ rpf_Check -b $bam -s --thread 10 -t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
725
+ -o $prefix_name &> $prefix_name".log"
726
+
727
+ done
728
+ ```
729
+
730
+ 4. 合并所有样本的质量分析结果
731
+ ```bash
732
+ $ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/
733
+
734
+ #################################################
735
+ # merge the rna-seq quality results
736
+ merge_length -l ./01.qc/*length_distribution.txt -o sce
737
+ merge_saturation -l ./01.qc/*gene_saturation.txt -o sce
738
+
739
+ ```
740
+
741
+
742
+ #### 3.2.2 测序数据的酶切和酶连的偏好性
743
+ 1. 检查 Ribo-seq 数据的酶切和酶连的偏好性
744
+ ```bash
745
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/02.digestion/
746
+
747
+ #################################################
748
+ # check the reads digestion
749
+ for bam in /mnt/t64/test/sce/4.ribo-seq/3.star/01.qc/*.bam
750
+ do
751
+ prefix_name=$(basename $bam .bam)
752
+
753
+ rpf_Digest -b $bam -m 27 -M 33 --scale \
754
+ -s /mnt/t64/test/sce/1.reference/norm/sce.norm.rna.fa \
755
+ -t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
756
+ -o $prefix_name &> $prefix_name".log"
757
+
758
+ done
759
+ ```
760
+
761
+ 2. 合并所有样本的 reads digestion
762
+ ```bash
763
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/
764
+
765
+ #################################################
766
+ # merge the rpf digestion
767
+ merge_digestion -l ./02.digestion/*pwm.txt -o sce
768
+
769
+ ```
770
+
771
+
772
+ 3. 检查 RNA-seq 数据的酶切和酶连的偏好性
773
+ ```bash
774
+ $ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/02.digestion/
775
+
776
+ #################################################
777
+ # check the reads digestion
778
+ for bam in /mnt/t64/test/sce/3.rna-seq/3.star/01.qc/*.bam
779
+ do
780
+ prefix_name=$(basename $bam .bam)
781
+
782
+ rpf_Digest -b $bam -m 25 -M 50 --scale \
783
+ -s /mnt/t64/test/sce/1.reference/norm/sce.norm.rna.fa \
784
+ -t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
785
+ -o $prefix_name &> $prefix_name".log"
786
+
787
+ done
788
+ ```
789
+
790
+ 4. 合并所有样本的 reads digestion
791
+ ```bash
792
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/
793
+
794
+ #################################################
795
+ # merge the rpf digestion
796
+ merge_digestion -l ./02.digestion/*pwm.txt -o sce
797
+
798
+ ```
799
+
800
+
801
+ #### 3.2.3 使用 RiboParser 做质量检查
802
+ 1. 预测 Ribo-seq 中的最佳 offset
803
+ ```bash
804
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/03.offset/
805
+
806
+ #################################################
807
+ # predict the offset table
808
+ for bam in /mnt/t64/test/sce/3.rna-seq/3.star/01.qc/*.bam
809
+ do
810
+ prefix_name=$(basename $bam .bam)
811
+
812
+ rpf_Offset -b $bam -m 27 -M 33 -p 30 -d \
813
+ --mode RSBM \
814
+ -t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
815
+ -o $prefix_name &> $prefix_name".log"
816
+
817
+ done
818
+ ```
819
+
820
+ 2. 合并所有样本的 offset 预测结果
821
+ ```bash
822
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/
823
+
824
+ #################################################
825
+ # merge the ribo-seq offset results
826
+ merge_offset_detail -l ./03.offset/*end.txt -o sce
827
+ merge_offset -l ./03.offset/*sscbm_offset.txt -o sce_sscbm
828
+ merge_offset -l ./03.offset/*rsbm_offset.txt -o sce_rsbm
829
+
830
+ ```
831
+
832
+ 3. RNA-seq 无需预测 offset,这里直接创建一个文件,其中 offset 值均为 12。
833
+ ```bash
834
+ $ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/03.offset/
835
+
836
+ #################################################
837
+ # set the offset table
838
+ for bam in /mnt/t64/test/sce/3.rna-seq/3.star/01.qc/*.bam
839
+ do
840
+
841
+ prefix_name=$(basename $bam .bam)
842
+ rna_Offset -m 27 -M 50 -e 12 -o $prefix_name &> $prefix_name".log"
843
+
844
+ done
845
+ ```
846
+
847
+
848
+ #### 3.2.4 把 bam 文件中的 reads 转换为 txt 文件中的 density。
849
+ 1. 转换 Ribo-seq 数据
850
+ ```bash
851
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/04.density/
852
+
853
+ #################################################
854
+ # convert the rpf to density
855
+ for bam in /mnt/t64/test/sce/4.ribo-seq/3.star/01.qc/*.bam
856
+ do
857
+ prefix_name=$(basename $bam .bam)
858
+
859
+ rpf_Density -b $bam -m 27 -M 33 --period 40 -l --thread 10 \
860
+ -p /mnt/t64/test/sce/4.ribo-seq/3.star/03.offset/$prefix_name"_rsbm_offset.txt" \
861
+ -s /mnt/t64/test/sce/1.reference/norm/sce.norm.rna.fa \
862
+ -t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
863
+ -o $prefix_name &> $prefix_name".log"
864
+
865
+ done
866
+
867
+ ```
868
+
869
+ 2. 转换 RNA-seq 数据
870
+ ```bash
871
+ $ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/04.density/
872
+
873
+ #################################################
874
+ # convert the reads to density
875
+ for bam in /mnt/t64/test/sce/3.rna-seq/3.star/01.qc/*.bam
876
+ do
877
+ prefix_name=$(basename $bam .bam)
878
+
879
+ rna_Density -b $bam -m 27 -M 33 --period 40 -l --thread 10 \
880
+ -p /mnt/t64/test/sce/3.rna-seq/3.star/03.offset/$prefix_name"_offset.txt" \
881
+ -s /mnt/t64/test/sce/1.reference/norm/sce.norm.rna.fa \
882
+ -t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
883
+ -o $prefix_name &> $prefix_name".log"
884
+
885
+ done
886
+
887
+ ```
888
+
889
+
890
+ #### 3.2.5 合并所有文件
891
+ 1. 合并 Ribo-seq density 文件
892
+ ```bash
893
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/
894
+
895
+ #################################################
896
+ # create the samples file: Ribo.file.list
897
+ merge_dst_list -l ../04.density/*_rpf.txt -o RPF.file.list
898
+
899
+
900
+ cat RPF.file.list
901
+
902
+ Name File Type
903
+ wt_ribo_YPD1 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944912_rpf.txt Ribo
904
+ wt_ribo_YPD2 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944913_rpf.txt Ribo
905
+ wt_ribo_YPD3 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944914_rpf.txt Ribo
906
+ ncs2d_ribo_YPD1 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944915_rpf.txt Ribo
907
+ ncs2d_ribo_YPD2 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944916_rpf.txt Ribo
908
+ ncs2d_ribo_YPD3 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944917_rpf.txt Ribo
909
+ elp6d_ribo_YPD1 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944918_rpf.txt Ribo
910
+ elp6d_ribo_YPD2 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944919_rpf.txt Ribo
911
+ elp6d_ribo_YPD3 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944920_rpf.txt Ribo
912
+ ncs2d_elp6d_ribo_YPD1 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944921_rpf.txt Ribo
913
+ ncs2d_elp6d_ribo_YPD2 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944922_rpf.txt Ribo
914
+ ncs2d_elp6d_ribo_YPD3 /mnt/t64/test/sce/4.ribo-seq/04.density/SRR1944923_rpf.txt Ribo
915
+
916
+ #################################################
917
+ # merge all the Ribo-seq files
918
+ rpf_Merge -l RPF.file.list -o sce_rpf &> sce.log
919
+
920
+ ```
921
+
922
+ 2. 合并 RNA-seq density 文件
923
+ ```bash
924
+ $ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/
925
+
926
+ #################################################
927
+ # create the samples file: RNA.file.list
928
+ merge_dst_list -l ../04.density/*_rna.txt -o RNA.file.list
929
+
930
+ cat RNA.file.list
931
+
932
+ Name File Type
933
+ wt_rna_YPD1 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944912_rna.txt RNA
934
+ wt_rna_YPD2 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944913_rna.txt RNA
935
+ wt_rna_YPD3 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944914_rna.txt RNA
936
+ ncs2d_rna_YPD1 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944915_rna.txt RNA
937
+ ncs2d_rna_YPD2 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944916_rna.txt RNA
938
+ ncs2d_rna_YPD3 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944917_rna.txt RNA
939
+ elp6d_rna_YPD1 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944918_rna.txt RNA
940
+ elp6d_rna_YPD2 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944919_rna.txt RNA
941
+ elp6d_rna_YPD3 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944920_rna.txt RNA
942
+ ncs2d_elp6d_rna_YPD1 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944921_rna.txt RNA
943
+ ncs2d_elp6d_rna_YPD2 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944922_rna.txt RNA
944
+ ncs2d_elp6d_rna_YPD3 /mnt/t64/test/sce/3.rna-seq/04.density/SRR1944923_rna.txt RNA
945
+
946
+ #################################################
947
+ # merge all the RNA-seq files
948
+ rpf_Merge -l RNA.file.list -o sce_rna &> sce.log
949
+
950
+ ```
951
+
952
+
953
+ #### 3.2.6 计算三核苷酸周期性
954
+ 1. 检查 Ribo-seq 数据三核苷酸周期性
955
+ ```bash
956
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/06.periodicity/
957
+
958
+ #################################################
959
+ # check the periodicity
960
+ rpf_Periodicity \
961
+ -r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
962
+ -m 30 --tis 0 --tts 0 -o sce &> sce.log
963
+
964
+ ```
965
+
966
+ 2. 检查 RNA-seq 数据三核苷酸周期性
967
+ ```bash
968
+ $ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/06.periodicity/
969
+
970
+ #################################################
971
+ # check the periodicity
972
+ rpf_Periodicity \
973
+ -r /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
974
+ -m 30 --tis 0 --tts 0 -o sce &> sce.log
975
+
976
+ ```
977
+
978
+
979
+ #### 3.2.7 起始和终止密码子前后的 meta-gene 分析
980
+ 1. Ribo-seq 数据 meta-gene 分析
981
+ ```bash
982
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/07.metaplot/
983
+
984
+ #################################################
985
+ # metagene analysis
986
+ rpf_Metaplot \
987
+ -t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
988
+ -r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
989
+ -m 50 --mode bar -o sce &> sce.log
990
+
991
+ ```
992
+
993
+ 2. RNA-seq 数据 meta-gene 分析
994
+ ```bash
995
+ $ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/07.metaplot/
996
+
997
+ #################################################
998
+ # metagene analysis
999
+ rpf_Metaplot \
1000
+ -t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
1001
+ -r /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
1002
+ -m 50 --mode bar -o sce &> sce.log
1003
+
1004
+ ```
1005
+
1006
+
1007
+ #### 3.2.8 检查基因上的整体 density 覆盖情况
1008
+ 1. 检查 Ribo-seq 数据的 density 覆盖
1009
+ ```bash
1010
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/08.coverage/
1011
+
1012
+ #################################################
1013
+ # check the rpf density along with the gene body
1014
+ rpf_Coverage \
1015
+ -t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
1016
+ -r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
1017
+ -m 50 --outlier \
1018
+ -b 10,100,10 \
1019
+ -n --heat \
1020
+ -o sce &> sce.log
1021
+
1022
+ ```
1023
+
1024
+ 2. 检查 RNA-seq 数据的 density 覆盖
1025
+ ```bash
1026
+ $ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/08.coverage/
1027
+
1028
+ #################################################
1029
+ # check the reads density along with the gene body
1030
+ rpf_Coverage \
1031
+ -t /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
1032
+ -r /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
1033
+ -m 50 --outlier \
1034
+ -b 10,100,10 \
1035
+ -n --heat \
1036
+ -o sce &> sce.log
1037
+
1038
+ ```
1039
+
1040
+
1041
+ #### 3.2.9 检查样本之间的重复性
1042
+ 1. 检查 Ribo-seq 数据样本重复性
1043
+ ```bash
1044
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/09.correlation/
1045
+
1046
+ #################################################
1047
+ # calculate the samples replication of Ribo-seq
1048
+ rpf_Corr \
1049
+ -r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
1050
+ -o sce &> sce.log
1051
+
1052
+ ```
1053
+
1054
+ 2. 检查 RNA-seq 数据的重复性
1055
+ ```bash
1056
+ $ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/09.correlation/
1057
+
1058
+ #################################################
1059
+ # calculate the samples replication of RNA-seq
1060
+ rpf_Corr \
1061
+ -r /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
1062
+ -o sce &> sce.log
1063
+
1064
+ ```
1065
+
1066
+
1067
+ #### 3.2.10 基因表达和翻译水平定量
1068
+ 1. 计算基因的翻译量(RPFs level)
1069
+ ```bash
1070
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/10.quantification/
1071
+
1072
+ #################################################
1073
+ # quantify the gene expression
1074
+ rpf_Quant \
1075
+ -r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
1076
+ --tis 15 \
1077
+ --tts 5 \
1078
+ -o sce &> sce.log
1079
+
1080
+ ```
1081
+
1082
+
1083
+ #### 3.2.11 计算密码子水平的 pausing score
1084
+ 1. 计算 Ribo-seq 数据中密码子水平的 pausing score
1085
+ ```bash
1086
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/11.pausing_score/
1087
+
1088
+ #################################################
1089
+ # calculate the codon pausing score of E/P/A site
1090
+ for sites in E P A
1091
+ do
1092
+ rpf_Pausing \
1093
+ -l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
1094
+ -r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
1095
+ -b 0 --stop \
1096
+ -m 30 \
1097
+ -s $sites \
1098
+ -f 0 \
1099
+ --scale minmax \
1100
+ -o "$sites"_site &> "$sites"_site.log
1101
+ done
1102
+
1103
+ ```
1104
+
1105
+
1106
+ #### 3.2.12 计算密码子水平的 occupancy
1107
+ 1. 计算 Ribo-seq 数据中密码子水平的 occupancy
1108
+ ```bash
1109
+ $ cd /mnt/t64/test/sce/4.rpf-seq/5.riboparser/12.codon_occupancy/
1110
+
1111
+ #################################################
1112
+ # calculate the codon occupancy of E/P/A site
1113
+ for sites in E P A
1114
+ do
1115
+ rpf_Occupancy \
1116
+ -l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
1117
+ -r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
1118
+ -m 30 \
1119
+ -s "$sites" \
1120
+ -f 0 --stop \
1121
+ --scale minmax \
1122
+ -o "$sites"_site &> "$sites"_site.log
1123
+ done
1124
+
1125
+ ```
1126
+
1127
+
1128
+ #### 3.2.13 计算密码子水平的 decoding time
1129
+ 1. 计算 Ribo-seq 数据中密码子水平的 decoding time
1130
+ ```bash
1131
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/13.codon_decoding_time/
1132
+
1133
+ #################################################
1134
+ # calculate the codon decoding time of E/P/A site
1135
+ for sites in E P A
1136
+ do
1137
+ rpf_CDT \
1138
+ -l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
1139
+ --rna /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
1140
+ --rpf /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
1141
+ --stop \
1142
+ -m 50 \
1143
+ -f 0 \
1144
+ -s $sites \
1145
+ --tis 10 \
1146
+ --tts 5 \
1147
+ -o "$sites"_site &> "$sites"_site.log
1148
+ done
1149
+
1150
+ ```
1151
+
1152
+
1153
+ #### 3.2.14 计算密码子水平的 selection time
1154
+ 1. 计算 Ribo-seq 数据中密码子水平的 selection time
1155
+ ```bash
1156
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/14.codon_selection_time/
1157
+
1158
+ #################################################
1159
+ # calculate the codon selection time of E/P/A site
1160
+ for sites in E P A
1161
+ do
1162
+ rpf_CST \
1163
+ -l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
1164
+ --rna /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
1165
+ --rpf /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
1166
+ --stop \
1167
+ -m 50 \
1168
+ -f 0 \
1169
+ -s $sites \
1170
+ --tis 10 \
1171
+ --tts 5 \
1172
+ -o "$sites"_site &> "$sites"_site.log
1173
+ done
1174
+
1175
+ ```
1176
+
1177
+
1178
+ #### 3.2.15 计算基因和密码子水平的变异系数
1179
+ 1. 计算 Ribo-seq 数据中基因和密码子水平的变异系数
1180
+ ```bash
1181
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/15.coefficient_of_variation/
1182
+
1183
+ #################################################
1184
+ # Here we can configure the design file to calculate differences between different groups.
1185
+ $ cat design.txt
1186
+ name group
1187
+ WT_ribo_YPD1 WT_ribo_YPD
1188
+ WT_ribo_YPD2 WT_ribo_YPD
1189
+ WT_ribo_YPD3 WT_ribo_YPD
1190
+ ncs2d_ribo_YPD1 ncs2d_ribo_YPD
1191
+ ncs2d_ribo_YPD2 ncs2d_ribo_YPD
1192
+ ncs2d_ribo_YPD3 ncs2d_ribo_YPD
1193
+ elp6d_ribo_YPD1 elp6d_ribo_YPD
1194
+ elp6d_ribo_YPD2 elp6d_ribo_YPD
1195
+ elp6d_ribo_YPD3 elp6d_ribo_YPD
1196
+ ncs2d_elp6d_ribo_YPD1 ncs2d_elp6d_ribo_YPD
1197
+ ncs2d_elp6d_ribo_YPD2 ncs2d_elp6d_ribo_YPD
1198
+ ncs2d_elp6d_ribo_YPD3 ncs2d_elp6d_ribo_YPD
1199
+
1200
+ #################################################
1201
+ # calculate the coefficient of variation
1202
+ rpf_CoV \
1203
+ -l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
1204
+ -r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
1205
+ -f 0 \
1206
+ -m 30 \
1207
+ --tis 10 \
1208
+ --tts 5 \
1209
+ --fig \
1210
+ -g design.txt \
1211
+ -o sce &> sce.log
1212
+
1213
+ ```
1214
+
1215
+ #### 3.2.16 密码子 meta-codon 分析
1216
+ 1. 计算 Ribo-seq 数据中密码子 meta density
1217
+ ```bash
1218
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/16.meta_codon/
1219
+
1220
+ #################################################
1221
+ # Here we can configure the codon list.
1222
+ $ cat codon_list.txt
1223
+ AAA
1224
+ AAC
1225
+ AAG
1226
+ AAT
1227
+ AAGAAG
1228
+ ATGATG
1229
+ CCCGGG
1230
+ ...
1231
+
1232
+
1233
+ #################################################
1234
+ # codon meta analysis
1235
+ rpf_Meta_Codon \
1236
+ -r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
1237
+ -m 50 -f 0 \
1238
+ -c codon_list.txt \
1239
+ -a 15 -u -n --fig \
1240
+ -o sce &> sce.log
1241
+
1242
+ ```
1243
+
1244
+ #### 3.2.17 Data shuffling
1245
+ 1. 重新洗牌 Ribo-seq 数据的 gene density 文件
1246
+ ```bash
1247
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/17.shuffle/
1248
+
1249
+ #################################################
1250
+ # codon meta analysis
1251
+ rpf_Shuffle \
1252
+ -l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
1253
+ -r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
1254
+ -s 0 \
1255
+ -i \
1256
+ -o sce &> sce.log
1257
+
1258
+ ```
1259
+
1260
+ 2. 重新洗牌 RNA-seq 数据的 gene density 文件
1261
+ ```bash
1262
+ $ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/11.shuffle/
1263
+
1264
+ #################################################
1265
+ # retrieve and format the gene density
1266
+ rpf_Shuffle \
1267
+ -l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
1268
+ -r /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
1269
+ -s 0 \
1270
+ -i \
1271
+ -o sce &> sce.log
1272
+
1273
+ ```
1274
+
1275
+ #### 3.2.18 提取 gene density
1276
+ 1. 提取和格式化 Ribo-seq 数据中的 gene density
1277
+ ```bash
1278
+ $ cd /mnt/t64/test/sce/4.ribo-seq/5.riboparser/18.gene_density/
1279
+
1280
+ #################################################
1281
+ # codon meta analysis
1282
+ rpf_Retrieve \
1283
+ -l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
1284
+ -r /mnt/t64/test/sce/4.ribo-seq/5.riboparser/05.merge/sce_rpf_merged.txt \
1285
+ -m 0 \
1286
+ -f \
1287
+ -n \
1288
+ -o sce &> sce.log
1289
+
1290
+ ```
1291
+
1292
+ 2. 提取和格式化 RNA-seq 数据中的 gene density
1293
+ ```bash
1294
+ $ cd /mnt/t64/test/sce/3.rna-seq/5.riboparser/12.gene_density/
1295
+
1296
+ #################################################
1297
+ # retrieve and format the gene density
1298
+ rpf_Retrieve \
1299
+ -l /mnt/t64/test/sce/1.reference/norm/sce.norm.txt \
1300
+ -r /mnt/t64/test/sce/3.rna-seq/5.riboparser/05.merge/sce_rna_merged.txt \
1301
+ -m 0 \
1302
+ -f \
1303
+ -n \
1304
+ -o sce &> sce.log
1305
+
1306
+ ```
1307
+
1308
+
1309
+ ## 4. 贡献
1310
+
1311
+ 欢迎提交问题和贡献代码
1312
+ 联系 rensc0718@163.com
1313
+
1314
+ ## 5. 许可证
1315
+
1316
+ 本项目可免费用于学术研究,不得用于商业用途。