miga-base 0.2.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +351 -0
- data/actions/add_result +61 -0
- data/actions/add_taxonomy +86 -0
- data/actions/create_dataset +62 -0
- data/actions/create_project +70 -0
- data/actions/daemon +69 -0
- data/actions/download_dataset +77 -0
- data/actions/find_datasets +63 -0
- data/actions/import_datasets +86 -0
- data/actions/index_taxonomy +71 -0
- data/actions/list_datasets +83 -0
- data/actions/list_files +67 -0
- data/actions/unlink_dataset +52 -0
- data/bin/miga +48 -0
- data/lib/miga/daemon.rb +178 -0
- data/lib/miga/dataset.rb +286 -0
- data/lib/miga/gui.rb +289 -0
- data/lib/miga/metadata.rb +74 -0
- data/lib/miga/project.rb +268 -0
- data/lib/miga/remote_dataset.rb +154 -0
- data/lib/miga/result.rb +102 -0
- data/lib/miga/tax_index.rb +70 -0
- data/lib/miga/taxonomy.rb +107 -0
- data/lib/miga.rb +83 -0
- data/scripts/_distances_noref_nomulti.bash +86 -0
- data/scripts/_distances_ref_nomulti.bash +105 -0
- data/scripts/aai_distances.bash +40 -0
- data/scripts/ani_distances.bash +39 -0
- data/scripts/assembly.bash +38 -0
- data/scripts/cds.bash +45 -0
- data/scripts/clade_finding.bash +27 -0
- data/scripts/distances.bash +30 -0
- data/scripts/essential_genes.bash +29 -0
- data/scripts/haai_distances.bash +39 -0
- data/scripts/init.bash +211 -0
- data/scripts/miga.bash +12 -0
- data/scripts/mytaxa.bash +93 -0
- data/scripts/mytaxa_scan.bash +85 -0
- data/scripts/ogs.bash +36 -0
- data/scripts/read_quality.bash +37 -0
- data/scripts/ssu.bash +35 -0
- data/scripts/subclades.bash +26 -0
- data/scripts/trimmed_fasta.bash +47 -0
- data/scripts/trimmed_reads.bash +57 -0
- data/utils/adapters.fa +302 -0
- data/utils/mytaxa_scan.R +89 -0
- data/utils/mytaxa_scan.rb +58 -0
- data/utils/requirements.txt +19 -0
- data/utils/subclades-compile.rb +48 -0
- data/utils/subclades.R +171 -0
- metadata +185 -0
data/utils/adapters.fa
ADDED
@@ -0,0 +1,302 @@
|
|
1
|
+
>Illumina_Single_End_Apapter_1
|
2
|
+
ACACTCTTTCCCTACACGACGCTGTTCCATCT
|
3
|
+
>Illumina_Single_End_Apapter_2
|
4
|
+
CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
|
5
|
+
>Illumina_Single_End_PCR_Primer_1
|
6
|
+
AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
|
7
|
+
>Illumina_Single_End_PCR_Primer_2
|
8
|
+
CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
|
9
|
+
>Illumina_Single_End_Sequencing_Primer
|
10
|
+
ACACTCTTTCCCTACACGACGCTCTTCCGATCT
|
11
|
+
|
12
|
+
>Illumina_Paired_End_Adapter_1
|
13
|
+
ACACTCTTTCCCTACACGACGCTCTTCCGATCT
|
14
|
+
>Illumina_Paired_End_Adapter_2
|
15
|
+
CTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
|
16
|
+
>Illumina_Paried_End_PCR_Primer_1
|
17
|
+
AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
|
18
|
+
>Illumina_Paired_End_PCR_Primer_2
|
19
|
+
CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
|
20
|
+
>Illumina_Paried_End_Sequencing_Primer_1
|
21
|
+
ACACTCTTTCCCTACACGACGCTCTTCCGATCT
|
22
|
+
>Illumina_Paired_End_Sequencing_Primer_2
|
23
|
+
CGGTCTCGGCATTCCTACTGAACCGCTCTTCCGATCT
|
24
|
+
|
25
|
+
>Illumina_DpnII_expression_Adapter_1
|
26
|
+
ACAGGTTCAGAGTTCTACAGTCCGAC
|
27
|
+
>Illumina_DpnII_expression_Adapter_2
|
28
|
+
CAAGCAGAAGACGGCATACGA
|
29
|
+
>Illumina_DpnII_expression_PCR_Primer_1
|
30
|
+
CAAGCAGAAGACGGCATACGA
|
31
|
+
>Illumina_DpnII_expression_PCR_Primer_2
|
32
|
+
AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
|
33
|
+
>Illumina_DpnII_expression_Sequencing_Primer
|
34
|
+
CGACAGGTTCAGAGTTCTACAGTCCGACGATC
|
35
|
+
|
36
|
+
>Illumina_NlaIII_expression_Adapter_1
|
37
|
+
ACAGGTTCAGAGTTCTACAGTCCGACATG
|
38
|
+
>Illumina_NlaIII_expression_Adapter_2
|
39
|
+
CAAGCAGAAGACGGCATACGA
|
40
|
+
>Illumina_NlaIII_expression_PCR_Primer_1
|
41
|
+
CAAGCAGAAGACGGCATACGA
|
42
|
+
>Illumina_NlaIII_expression_PCR_Primer_2
|
43
|
+
AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
|
44
|
+
>Illumina_NlaIII_expression_Sequencing_Primer
|
45
|
+
CCGACAGGTTCAGAGTTCTACAGTCCGACATG
|
46
|
+
|
47
|
+
>Illumina_Small_RNA_Adapter_1
|
48
|
+
GTTCAGAGTTCTACAGTCCGACGATC
|
49
|
+
>Illumina_Small_RNA_Adapter_2
|
50
|
+
TCGTATGCCGTCTTCTGCTTGT
|
51
|
+
>Illumina_Small_RNA_RT_Primer
|
52
|
+
CAAGCAGAAGACGGCATACGA
|
53
|
+
>Illumina_Small_RNA_PCR_Primer_1
|
54
|
+
CAAGCAGAAGACGGCATACGA
|
55
|
+
>Illumina_Small_RNA_PCR_Primer_2
|
56
|
+
AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
|
57
|
+
>Illumina_Small_RNA_Sequencing_Primer
|
58
|
+
CGACAGGTTCAGAGTTCTACAGTCCGACGATC
|
59
|
+
|
60
|
+
>Illumina_Multiplexing_Adapter_1
|
61
|
+
GATCGGAAGAGCACACGTCT
|
62
|
+
>Illumina_Multiplexing_Adapter_2
|
63
|
+
ACACTCTTTCCCTACACGACGCTCTTCCGATCT
|
64
|
+
>Illumina_Multiplexing_PCR_Primer_1.01
|
65
|
+
AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
|
66
|
+
>Illumina_Multiplexing_PCR_Primer_2.01
|
67
|
+
GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
|
68
|
+
>Illumina_Multiplexing_Read1_Sequencing_Primer
|
69
|
+
ACACTCTTTCCCTACACGACGCTCTTCCGATCT
|
70
|
+
>Illumina_Multiplexing_Index_Sequencing_Primer
|
71
|
+
GATCGGAAGAGCACACGTCTGAACTCCAGTCAC
|
72
|
+
>Illumina_Multiplexing_Read2_Sequencing_Primer
|
73
|
+
GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
|
74
|
+
|
75
|
+
>Illumina_PCR_Primer_Index_1
|
76
|
+
CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTC
|
77
|
+
>Illumina_PCR_Primer_Index_2
|
78
|
+
CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTC
|
79
|
+
>Illumina_PCR_Primer_Index_3
|
80
|
+
CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTC
|
81
|
+
>Illumina_PCR_Primer_Index_4
|
82
|
+
CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTC
|
83
|
+
>Illumina_PCR_Primer_Index_5
|
84
|
+
CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTC
|
85
|
+
>Illumina_PCR_Primer_Index_6
|
86
|
+
CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTC
|
87
|
+
>Illumina_PCR_Primer_Index_7
|
88
|
+
CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTC
|
89
|
+
>Illumina_PCR_Primer_Index_8
|
90
|
+
CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTC
|
91
|
+
>Illumina_PCR_Primer_Index_9
|
92
|
+
CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTC
|
93
|
+
>Illumina_PCR_Primer_Index_10
|
94
|
+
CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTC
|
95
|
+
>Illumina_PCR_Primer_Index_11
|
96
|
+
CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTC
|
97
|
+
>Illumina_PCR_Primer_Index_12
|
98
|
+
CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTC
|
99
|
+
|
100
|
+
>Illumina_DpnII_Gex_Adapter_1
|
101
|
+
GATCGTCGGACTGTAGAACTCTGAAC
|
102
|
+
>Illumina_DpnII_Gex_Adapter_1.01
|
103
|
+
ACAGGTTCAGAGTTCTACAGTCCGAC
|
104
|
+
>Illumina_DpnII_Gex_Adapter_2
|
105
|
+
CAAGCAGAAGACGGCATACGA
|
106
|
+
>Illumina_DpnII_Gex_Adapter_2.01
|
107
|
+
TCGTATGCCGTCTTCTGCTTG
|
108
|
+
>Illumina_DpnII_Gex_PCR_Primer_1
|
109
|
+
CAAGCAGAAGACGGCATACGA
|
110
|
+
>Illumina_DpnII_Gex_PCR_Primer_2
|
111
|
+
AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
|
112
|
+
>Illumina_DpnII_Gex_Sequencing_Primer
|
113
|
+
CGACAGGTTCAGAGTTCTACAGTCCGACGATC
|
114
|
+
|
115
|
+
>Illumina_NlaIII_Gex_Adapter_1.01
|
116
|
+
TCGGACTGTAGAACTCTGAAC
|
117
|
+
>Illumina_NlaIII_Gex_Adapter_1.02
|
118
|
+
ACAGGTTCAGAGTTCTACAGTCCGACATG
|
119
|
+
>Illumina_NlaIII_Gex_Adapter_2.01
|
120
|
+
CAAGCAGAAGACGGCATACGA
|
121
|
+
>Illumina_NlaIII_Gex_Adapter_2.02
|
122
|
+
TCGTATGCCGTCTTCTGCTTG
|
123
|
+
>Illumina_NlaIII_Gex_PCR_Primer_1
|
124
|
+
CAAGCAGAAGACGGCATACGA
|
125
|
+
>Illumina_NlaIII_Gex_PCR_Primer_2
|
126
|
+
AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
|
127
|
+
>Illumina_NlaIII_Gex_Sequencing_Primer
|
128
|
+
CCGACAGGTTCAGAGTTCTACAGTCCGACATG
|
129
|
+
|
130
|
+
>Illumina_Small_RNA_RT_Primer
|
131
|
+
CAAGCAGAAGACGGCATACGA
|
132
|
+
>Illumina_5p_RNA_Adapter
|
133
|
+
GTTCAGAGTTCTACAGTCCGACGATC
|
134
|
+
>Illumina_RNA_Adapter1
|
135
|
+
TCGTATGCCGTCTTCTGCTTGT
|
136
|
+
|
137
|
+
>Illumina_Small_RNA_3p_Adapter_1
|
138
|
+
ATCTCGTATGCCGTCTTCTGCTTG
|
139
|
+
>Illumina_Small_RNA_PCR_Primer_1
|
140
|
+
CAAGCAGAAGACGGCATACGA
|
141
|
+
>Illumina_Small_RNA_PCR_Primer_2
|
142
|
+
AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
|
143
|
+
>Illumina_Small_RNA_Sequencing_Primer
|
144
|
+
CGACAGGTTCAGAGTTCTACAGTCCGACGATC
|
145
|
+
|
146
|
+
>TruSeq_Universal_Adapter
|
147
|
+
AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
|
148
|
+
>TruSeq_Adapter_Index_1
|
149
|
+
GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG
|
150
|
+
>TruSeq_Adapter_Index_2
|
151
|
+
GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG
|
152
|
+
>TruSeq_Adapter_Index_3
|
153
|
+
GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG
|
154
|
+
>TruSeq_Adapter_Index_4
|
155
|
+
GATCGGAAGAGCACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG
|
156
|
+
>TruSeq_Adapter_Index_5
|
157
|
+
GATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG
|
158
|
+
>TruSeq_Adapter_Index_6
|
159
|
+
GATCGGAAGAGCACACGTCTGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG
|
160
|
+
>TruSeq_Adapter_Index_7
|
161
|
+
GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG
|
162
|
+
>TruSeq_Adapter_Index_8
|
163
|
+
GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG
|
164
|
+
>TruSeq_Adapter_Index_9
|
165
|
+
GATCGGAAGAGCACACGTCTGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGCTTG
|
166
|
+
>TruSeq_Adapter_Index_10
|
167
|
+
GATCGGAAGAGCACACGTCTGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGCTTG
|
168
|
+
>TruSeq_Adapter_Index_11
|
169
|
+
GATCGGAAGAGCACACGTCTGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGCTTG
|
170
|
+
>TruSeq_Adapter_Index_12
|
171
|
+
GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGCTTG
|
172
|
+
|
173
|
+
>Illumina_RNA_RT_Primer
|
174
|
+
GCCTTGGCACCCGAGAATTCCA
|
175
|
+
>Illumina_RNA_PCR_Primer
|
176
|
+
AATGATACGGCGACCACCGAGATCTACACGTTCAGAGTTCTACAGTCCGA
|
177
|
+
|
178
|
+
>RNA_PCR_Primer_Index_1
|
179
|
+
CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
180
|
+
>RNA_PCR_Primer_Index_2
|
181
|
+
CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
182
|
+
>RNA_PCR_Primer_Index_3
|
183
|
+
CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
184
|
+
>RNA_PCR_Primer_Index_4
|
185
|
+
CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
186
|
+
>RNA_PCR_Primer_Index_5
|
187
|
+
CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
188
|
+
>RNA_PCR_Primer_Index_6
|
189
|
+
CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
190
|
+
>RNA_PCR_Primer_Index_7
|
191
|
+
CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
192
|
+
>RNA_PCR_Primer_Index_8
|
193
|
+
CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
194
|
+
>RNA_PCR_Primer_Index_9
|
195
|
+
CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
196
|
+
>RNA_PCR_Primer_Index_10
|
197
|
+
CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
198
|
+
>RNA_PCR_Primer_Index_11
|
199
|
+
CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
200
|
+
>RNA_PCR_Primer_Index_12
|
201
|
+
CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
202
|
+
>RNA_PCR_Primer_Index_13
|
203
|
+
CAAGCAGAAGACGGCATACGAGATTTGACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
204
|
+
>RNA_PCR_Primer_Index_14
|
205
|
+
CAAGCAGAAGACGGCATACGAGATGGAACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
206
|
+
>RNA_PCR_Primer_Index_15
|
207
|
+
CAAGCAGAAGACGGCATACGAGATTGACATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
208
|
+
>RNA_PCR_Primer_Index_16
|
209
|
+
CAAGCAGAAGACGGCATACGAGATGGACGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
210
|
+
>RNA_PCR_Primer_Index_17
|
211
|
+
CAAGCAGAAGACGGCATACGAGATCTCTACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
212
|
+
>RNA_PCR_Primer_Index_18
|
213
|
+
CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
214
|
+
>RNA_PCR_Primer_Index_19
|
215
|
+
CAAGCAGAAGACGGCATACGAGATTTTCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
216
|
+
>RNA_PCR_Primer_Index_20
|
217
|
+
CAAGCAGAAGACGGCATACGAGATGGCCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
218
|
+
>RNA_PCR_Primer_Index_21
|
219
|
+
CAAGCAGAAGACGGCATACGAGATCGAAACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
220
|
+
>RNA_PCR_Primer_Index_22
|
221
|
+
CAAGCAGAAGACGGCATACGAGATCGTACGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
222
|
+
>RNA_PCR_Primer_Index_23
|
223
|
+
CAAGCAGAAGACGGCATACGAGATCCACTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
224
|
+
>RNA_PCR_Primer_Index_24
|
225
|
+
CAAGCAGAAGACGGCATACGAGATGCTACCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
226
|
+
>RNA_PCR_Primer_Index_25
|
227
|
+
CAAGCAGAAGACGGCATACGAGATATCAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
228
|
+
>RNA_PCR_Primer_Index_26
|
229
|
+
CAAGCAGAAGACGGCATACGAGATGCTCATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
230
|
+
>RNA_PCR_Primer_Index_27
|
231
|
+
CAAGCAGAAGACGGCATACGAGATAGGAATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
232
|
+
>RNA_PCR_Primer_Index_28
|
233
|
+
CAAGCAGAAGACGGCATACGAGATCTTTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
234
|
+
>RNA_PCR_Primer_Index_29
|
235
|
+
CAAGCAGAAGACGGCATACGAGATTAGTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
236
|
+
>RNA_PCR_Primer_Index_30
|
237
|
+
CAAGCAGAAGACGGCATACGAGATCCGGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
238
|
+
>RNA_PCR_Primer_Index_31
|
239
|
+
CAAGCAGAAGACGGCATACGAGATATCGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
240
|
+
>RNA_PCR_Primer_Index_32
|
241
|
+
CAAGCAGAAGACGGCATACGAGATTGAGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
242
|
+
>RNA_PCR_Primer_Index_33
|
243
|
+
CAAGCAGAAGACGGCATACGAGATCGCCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
244
|
+
>RNA_PCR_Primer_Index_34
|
245
|
+
CAAGCAGAAGACGGCATACGAGATGCCATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
246
|
+
>RNA_PCR_Primer_Index_35
|
247
|
+
CAAGCAGAAGACGGCATACGAGATAAAATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
248
|
+
>RNA_PCR_Primer_Index_36
|
249
|
+
CAAGCAGAAGACGGCATACGAGATTGTTGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
250
|
+
>RNA_PCR_Primer_Index_37
|
251
|
+
CAAGCAGAAGACGGCATACGAGATATTCCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
252
|
+
>RNA_PCR_Primer_Index_38
|
253
|
+
CAAGCAGAAGACGGCATACGAGATAGCTAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
254
|
+
>RNA_PCR_Primer_Index_39
|
255
|
+
CAAGCAGAAGACGGCATACGAGATGTATAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
256
|
+
>RNA_PCR_Primer_Index_40
|
257
|
+
CAAGCAGAAGACGGCATACGAGATTCTGAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
258
|
+
>RNA_PCR_Primer_Index_41
|
259
|
+
CAAGCAGAAGACGGCATACGAGATGTCGTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
260
|
+
>RNA_PCR_Primer_Index_42
|
261
|
+
CAAGCAGAAGACGGCATACGAGATCGATTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
262
|
+
>RNA_PCR_Primer_Index_43
|
263
|
+
CAAGCAGAAGACGGCATACGAGATGCTGTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
264
|
+
>RNA_PCR_Primer_Index_44
|
265
|
+
CAAGCAGAAGACGGCATACGAGATATTATAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
266
|
+
>RNA_PCR_Primer_Index_45
|
267
|
+
CAAGCAGAAGACGGCATACGAGATGAATGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
268
|
+
>RNA_PCR_Primer_Index_46
|
269
|
+
CAAGCAGAAGACGGCATACGAGATTCGGGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
270
|
+
>RNA_PCR_Primer_Index_47
|
271
|
+
CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
272
|
+
>RNA_PCR_Primer_Index_48
|
273
|
+
CAAGCAGAAGACGGCATACGAGATTGCCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
|
274
|
+
|
275
|
+
>ABI_Dynabead_EcoP_Oligo
|
276
|
+
CTGATCTAGAGGTACCGGATCCCAGCAGT
|
277
|
+
>ABI_Solid3_Adapter_A
|
278
|
+
CTGCCCCGGGTTCCTCATTCTCTCAGCAGCATG
|
279
|
+
>ABI_Solid3_Adapter_B
|
280
|
+
CCACTACGCCTCCGCTTTCCTCTCTATGGGCAGTCGGTGAT
|
281
|
+
>ABI_Solid3_5_AMP_Primer
|
282
|
+
CCACTACGCCTCCGCTTTCCTCTCTATG
|
283
|
+
>ABI_Solid3_3_AMP_Primer
|
284
|
+
CTGCCCCGGGTTCCTCATTCT
|
285
|
+
>ABI_Solid3_EF1_alpha_Sense_Primer
|
286
|
+
CATGTGTGTTGAGAGCTTC
|
287
|
+
>ABI_Solid3_EF1_alpha_Antisense_Primer
|
288
|
+
GAAAACCAAAGTGGTCCAC
|
289
|
+
>ABI_Solid3_GAPDH_Forward_Primer
|
290
|
+
TTAGCACCCCTGGCCAAGG
|
291
|
+
>ABI_Solid3_GAPDH_Reverse_Primer
|
292
|
+
CTTACTCCTTGGAGGCCATG
|
293
|
+
>TruSeq2_SE
|
294
|
+
AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG
|
295
|
+
>TruSeq2_PE_f
|
296
|
+
AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
|
297
|
+
>TruSeq2_PE_r
|
298
|
+
AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
|
299
|
+
>TruSeq3_IndexedAdapter
|
300
|
+
AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
|
301
|
+
>TruSeq3_UniversalAdapter
|
302
|
+
AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA
|
data/utils/mytaxa_scan.R
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
|
2
|
+
mytaxa.scan <- function(
|
3
|
+
wintax,
|
4
|
+
col=c('#4dbeee','#7e2f8e','#0072bd','#d95319',
|
5
|
+
'#edb120','#77ac30','#a2142f'),
|
6
|
+
main='MyTaxa scan'){
|
7
|
+
a <- read.table(wintax, sep='\t', h=F, row.names=1, na.strings='', quote='');
|
8
|
+
if(! "NA" %in% rownames(a)) a["NA", ] <- 0
|
9
|
+
b <- as.matrix(a[-which(rownames(a)=="NA"),-1]);
|
10
|
+
if(ncol(b) <= 1){
|
11
|
+
plot(1,t='n',bty='n',axes=FALSE);
|
12
|
+
legend('center',legend='Insufficient data');
|
13
|
+
return(c());
|
14
|
+
}
|
15
|
+
|
16
|
+
layout(matrix(c(6,6,1,4,2,3,5,5),byrow=T,ncol=2),
|
17
|
+
widths=c(7,1), heights=c(1/4,1,2,3));
|
18
|
+
|
19
|
+
#::: DISTANCES
|
20
|
+
par(mar=c(1,5,2,0)+0.1);
|
21
|
+
d <- apply( a[,-1], 2,
|
22
|
+
function(x,y) sqrt(sum((sqrt(x)-sqrt(y))^2)/2), y=a[,1] );
|
23
|
+
d.thr <- quantile(d, probs=0.95, names=F, na.rm=TRUE)
|
24
|
+
plot(1, xlim=c(0, length(d)+1), ylim=c(0,1), xlab='', xaxs='i', xaxt='n',
|
25
|
+
t='n', pch=19, cex=1/2, col=grey(0.3), bty='n', ylab='Signal', las=1);
|
26
|
+
rect((1:length(d))-1, 0, 1:length(d), d, col=ifelse(d>d.thr, grey(0.3),
|
27
|
+
grey(0.5)), border='NA');
|
28
|
+
|
29
|
+
#::: WINDOWS BARPLOT
|
30
|
+
par(mar=c(0,5,0,0)+0.1);
|
31
|
+
plot(1, t='n', xlim=c(0,ncol(b)+1), xaxs='i', ylim=c(0,1.2),
|
32
|
+
yaxs='i', xlab='', ylab='Frequency', bty='n', xaxt='n', yaxt='n');
|
33
|
+
axis(2, at=seq(0,1,by=0.2), las=1);
|
34
|
+
# Regions (outliers)
|
35
|
+
regs <- c();
|
36
|
+
for(j in 1:ncol(b)) if(d[j] > d.thr) regs <- c(regs, j);
|
37
|
+
if(length(regs)>0){
|
38
|
+
x <- regs-0.5;
|
39
|
+
y <- rep(1.05,length(regs)) + ((1:length(regs)) %% 2)/10;
|
40
|
+
points(x, y, pch=19, cex=3, col='darkred');
|
41
|
+
arrows(x0=x, y0=0.01, y1=y, col='darkred', length=0);
|
42
|
+
text(x, y, 1:length(regs), col='white', font=2, cex=3/4);
|
43
|
+
write.table(regs, paste(wintax,".regions",sep=""), col.names=F,
|
44
|
+
row.names=F, quote=F)
|
45
|
+
}
|
46
|
+
# Bars
|
47
|
+
h <- rep(0, ncol(b));
|
48
|
+
all_cols <- c();
|
49
|
+
for(i in 1:nrow(b)){
|
50
|
+
i.col = 1+((i-1) %% (length(col)-1));
|
51
|
+
hn <- h + as.numeric(b[i, ]);
|
52
|
+
for(j in 1:ncol(b))
|
53
|
+
if(b[i,j]>0)
|
54
|
+
rect(j-1, h[j], j, hn[j], col=col[i.col], border=NA);
|
55
|
+
all_cols <- c(all_cols, col[i.col]);
|
56
|
+
if(i.col+1 == length(col))
|
57
|
+
for(j in 1:length(col)){
|
58
|
+
k = col2rgb(col[j]);
|
59
|
+
col[j] = rgb(k[1], k[2], k[3], maxColorValue=256*1.3)
|
60
|
+
}
|
61
|
+
h <- hn;
|
62
|
+
}
|
63
|
+
|
64
|
+
#::: GENOME PROFILE
|
65
|
+
par(mar=c(0,0,0,2)+0.1);
|
66
|
+
plot(1, t='n', xlim=c(0,1), xaxs='i', ylim=c(0,1.2), yaxs='i',
|
67
|
+
xlab='', ylab='', bty='n', xaxt='n', yaxt='n');
|
68
|
+
rect(0, cumsum(c(0,a[-nrow(a),1])), 1, cumsum(a[, 1]),
|
69
|
+
col=all_cols, border=NA);
|
70
|
+
text(0.5, 1.1, 'Genome', font=2, cex=1.5, col='darkred');
|
71
|
+
|
72
|
+
#::: DISTANCES BOXPLOT
|
73
|
+
par(mar=c(1,0,2,2)+0.1);
|
74
|
+
boxplot(d, ylim=c(0,1), yaxs='i', axes=F, col=grey(0.3), pch=19);
|
75
|
+
|
76
|
+
#::: LEGEND
|
77
|
+
par(mar=c(0,2,0,2)+0.1);
|
78
|
+
plot(1, t='n', bty='n', xlim=c(0,1),
|
79
|
+
ylim=c(0,1), xaxs='i', yaxs='i', axes=F);
|
80
|
+
legend('top', pt.bg=all_cols, col=grey(0.3), pch=22,
|
81
|
+
legend=gsub('.*::','',rownames(b)), ncol=5, cex=3/4, bty='n');
|
82
|
+
|
83
|
+
#::: MAIN
|
84
|
+
plot(1, t='n', bty='n', xlim=c(0,1), ylim=c(0,1), axes=F);
|
85
|
+
text(.5,.5,main);
|
86
|
+
|
87
|
+
return(regs);
|
88
|
+
}
|
89
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
abort "
|
4
|
+
Usage:
|
5
|
+
#{$0} {FastA file} {MyTaxa file} {Data output}
|
6
|
+
|
7
|
+
" if ARGV[2].nil?
|
8
|
+
|
9
|
+
begin
|
10
|
+
# Get arguments
|
11
|
+
faa, mytaxa, outdata = ARGV
|
12
|
+
winsize = 10
|
13
|
+
|
14
|
+
# Extract gene IDs
|
15
|
+
ids = File.open(faa).grep(/^>/).map{|dl| dl.chomp.sub(/^>/,"").sub(/\s.*/,"")}
|
16
|
+
tax = Hash[ids.map{|k| [k, "NA"]}]
|
17
|
+
|
18
|
+
# Get MyTaxa distributions
|
19
|
+
k, l = nil
|
20
|
+
File.open(mytaxa).each do |ln|
|
21
|
+
ln.chomp!
|
22
|
+
if $.%2 == 1
|
23
|
+
k, l = ln.split /\t/
|
24
|
+
else
|
25
|
+
tax[k] = ln.gsub(/<[^>]+>/,"").gsub(/;/,"::")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
all_tax = tax.values.uniq.sort{|x,y| tax.values.count(y) <=> tax.values.count(x) }
|
29
|
+
|
30
|
+
# Estimate Windows and save gene IDs
|
31
|
+
fh = File.open(outdata + ".genes", "w")
|
32
|
+
c = []
|
33
|
+
c << all_tax.map{|t| tax.values.count(t) }
|
34
|
+
n_wins = (ids.size/winsize).ceil
|
35
|
+
(0 .. (n_wins-1)).each do |win|
|
36
|
+
k = ids[win*winsize, winsize]
|
37
|
+
win_t = tax.values_at(*k)
|
38
|
+
fh.puts k.join("\t")
|
39
|
+
c << all_tax.map{|t| win_t.count(t)}
|
40
|
+
end
|
41
|
+
p = c.map{|col| col.map{|cell| cell.to_f/col.inject(:+)}}
|
42
|
+
fh.close
|
43
|
+
|
44
|
+
# Save window profiles
|
45
|
+
fh = File.open(outdata, "w")
|
46
|
+
fh.puts "# Data derived from #{mytaxa}, with #{winsize}-genes windows"
|
47
|
+
fh.puts "# " + (["Tax-label", "Genome"] + (1 .. n_wins).map{|i| "Win_#{i}"}).join("\t")
|
48
|
+
(0 .. (all_tax.size - 1)).each do |row|
|
49
|
+
fh.puts ([all_tax[row]] + p.map{|col| col[row]}).join "\t"
|
50
|
+
end
|
51
|
+
fh.close
|
52
|
+
rescue => err
|
53
|
+
$stderr.puts "Exception: #{err}\n\n"
|
54
|
+
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
55
|
+
err
|
56
|
+
end
|
57
|
+
|
58
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
Software Test executable Website Notes
|
2
|
+
-------- --------------- ------- -----
|
3
|
+
Enve-omics scripts FastQ.tag.rb http://github.com/lmrodriguezr/enveomics All the collection must be present
|
4
|
+
SolexaQA++ SolexaQA++ http://solexaqa.sourceforge.net Required version: v3.1.3+
|
5
|
+
Scythe scythe https://github.com/vsbuffalo/scythe Required version: 0.991+
|
6
|
+
FastQC fastqc http://www.bioinformatics.babraham.ac.uk/projects/fastqc
|
7
|
+
IDBA idba_ud http://i.cs.hku.hk/~alse/hkubrg/projects/idba
|
8
|
+
MetaGeneMark gmhmmp http://exon.gatech.edu/genemark/license_download.cgi The folder must contain the key and the scripts
|
9
|
+
HMMer 3.0+ hmmsearch http://hmmer.janelia.org/software
|
10
|
+
NCBI BLAST+ blastp ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST
|
11
|
+
R R http://www.r-project.org/
|
12
|
+
SQLite3 sqlite3 https://www.sqlite.org/
|
13
|
+
RAxML (pthreads) raxmlHPC-PTHREADS http://sco.h-its.org/exelixis/web/software/raxml/index.html
|
14
|
+
MCL mcl http://micans.org/mcl/
|
15
|
+
DIAMOND diamond http://ab.inf.uni-tuebingen.de/software/diamond Required version: v0.7.9+
|
16
|
+
MyTaxa MyTaxa http://enve-omics.ce.gatech.edu/mytaxa The folder must contain the db and utils dirs, and the AllGenomes.faa BLAST database
|
17
|
+
Krona ktImportText https://github.com/marbl/Krona/wiki
|
18
|
+
Barrnap barrnap http://www.vicbioinformatics.com/software.barrnap.shtml
|
19
|
+
bedtools bedtools http://bedtools.readthedocs.org/en/latest/
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# @author Luis M. Rodriguez-R
|
4
|
+
# @update Jan-15-2016
|
5
|
+
# @license artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
$:.push File.expand_path(File.dirname(__FILE__) + "/lib")
|
9
|
+
dir = ARGV.shift or abort "Usage: #{$0} <classif.dir>"
|
10
|
+
|
11
|
+
def read_classif(dir, classif={})
|
12
|
+
fh = File.open(File.expand_path("miga-project.1.classif", dir), "r")
|
13
|
+
klass = []
|
14
|
+
while ln = fh.gets
|
15
|
+
r = ln.chomp.split("\t")
|
16
|
+
classif[r[0]] ||= []
|
17
|
+
classif[r[0]] << r[1]
|
18
|
+
klass[r[1].to_i] = r[1]
|
19
|
+
end
|
20
|
+
fh.close
|
21
|
+
klass.each do |i|
|
22
|
+
d = File.expand_path("miga-project.1.sc-#{i}", dir)
|
23
|
+
classif = read_classif(d, classif) if Dir.exist? d
|
24
|
+
end
|
25
|
+
classif
|
26
|
+
end
|
27
|
+
|
28
|
+
def print_tree(classif, col=0)
|
29
|
+
klass = classif.values.map{ |i| i[col] }.compact.uniq
|
30
|
+
if klass.size<=1
|
31
|
+
o = classif.keys
|
32
|
+
else
|
33
|
+
o = klass.map do |c|
|
34
|
+
oo = print_tree(classif.select{ |k,v| v[col]==c }, col+1)
|
35
|
+
"#{oo}[#{c}]" unless oo.nil?
|
36
|
+
end.compact
|
37
|
+
end
|
38
|
+
o.size==0 ? nil :
|
39
|
+
o.size==1 ? o[0] :
|
40
|
+
"(#{o.join(",")})"
|
41
|
+
end
|
42
|
+
|
43
|
+
c = read_classif(dir)
|
44
|
+
max_depth = c.values.map{|i| i.count}.max
|
45
|
+
c.each do |k,v|
|
46
|
+
puts ([k] + v + ["0"]*(max_depth-v.count)).join("\t")
|
47
|
+
end
|
48
|
+
$stderr.puts print_tree(c) + ";"
|
data/utils/subclades.R
ADDED
@@ -0,0 +1,171 @@
|
|
1
|
+
library(enveomics.R)
|
2
|
+
library(ape)
|
3
|
+
library(ggdendro)
|
4
|
+
library(ggplot2)
|
5
|
+
library(grid)
|
6
|
+
library(gridExtra)
|
7
|
+
library(cluster)
|
8
|
+
library(dendextend)
|
9
|
+
library(vegan)
|
10
|
+
library(scatterplot3d)
|
11
|
+
|
12
|
+
# Main function
|
13
|
+
subclades <- function(ani_file, out_base, thr=1, ani=c()){
|
14
|
+
# Get ANI distances
|
15
|
+
cat("====", out_base, "\n")
|
16
|
+
if(missing(ani_file)){
|
17
|
+
a <- as.data.frame(ani)
|
18
|
+
} else {
|
19
|
+
a <- read.table(gzfile(ani_file), sep='\t', h=TRUE, as.is=T)
|
20
|
+
}
|
21
|
+
if(nrow(a)==0){
|
22
|
+
pdf(paste(out_base,'.pdf',sep=''), 7, 12)
|
23
|
+
plot(1,t='n',axes=F)
|
24
|
+
legend('center','No ANI data',bty='n')
|
25
|
+
dev.off()
|
26
|
+
file.create(paste(out_base,'.1.classif',sep=''))
|
27
|
+
file.create(paste(out_base,'.1.medoids',sep=''))
|
28
|
+
return(NULL)
|
29
|
+
}
|
30
|
+
ani.d <- enve.df2dist(cbind(a$a, a$b, 1-a$value/100), default.d=0.3)
|
31
|
+
ani.hc <- hclust(ani.d, method='ward.D2')
|
32
|
+
write.tree(as.phylo(ani.hc), 'miga-project.ani.nwk')
|
33
|
+
|
34
|
+
# Silhouette
|
35
|
+
k <- 2:(length(labels(ani.d))-1)
|
36
|
+
s <- sapply(k, function(x) summary(silhouette(pam(ani.d, x)))$avg.width)
|
37
|
+
ds <- 10^(s[-c(1,length(s))]-(s[-length(s)+c(0,1)]+s[-c(1,2)])/2)
|
38
|
+
top.n <- head(k[order(c(-Inf,ds,-Inf), decreasing=T)],n=6)
|
39
|
+
|
40
|
+
# Save "ANI-types"
|
41
|
+
ani.types <- c()
|
42
|
+
ani.medoids <- list()
|
43
|
+
for(i in 1:length(top.n)){
|
44
|
+
k_i <- top.n[i]
|
45
|
+
ani.cl <- pam(ani.d, k_i)
|
46
|
+
ani.types <- cbind(ani.types, ani.cl$clustering)
|
47
|
+
ani.medoids[[ i ]] <- ani.cl$medoids
|
48
|
+
}
|
49
|
+
|
50
|
+
# Generate graphic reports
|
51
|
+
pdf(paste(out_base,'.pdf',sep=''), 7, 12)
|
52
|
+
plotClusterAndMetadata(as.dendrogram(ani.hc), ani.types, main='ANI types')
|
53
|
+
ani.mds <- metaMDS(ani.d, k=3, autotransform=FALSE, parallel=thr, wascores=F)
|
54
|
+
layout(matrix(1:6, ncol=2))
|
55
|
+
for(i in 1:length(top.n)){
|
56
|
+
s <- scatterplot3d(ani.mds$points, pch=19, type='h',
|
57
|
+
color=ggplotColours(top.n[i], alpha=1/2)[ani.types[,i]],
|
58
|
+
cex.symbols=1/2, box=FALSE, lty.hplot=3,
|
59
|
+
main=paste('NMDS of ANI distances with', top.n[i] ,'clusters'),
|
60
|
+
angle=80, scale.y=3/2, las=2, xlab='', ylab='', zlab='')
|
61
|
+
for(cl in 1:top.n[i]){
|
62
|
+
col <- ggplotColours(top.n[i])[cl]
|
63
|
+
med <- s$xyz.convert(matrix(ani.mds$points[ ani.medoids[[i]][cl] , ],
|
64
|
+
ncol=3))
|
65
|
+
if(sum(ani.types[,i]==cl)>1){
|
66
|
+
val <- s$xyz.convert(matrix(ani.mds$points[ ani.types[,i]==cl , ],
|
67
|
+
ncol=3))
|
68
|
+
arrows(x0=med$x, y0=med$y, x1=val$x, y1=val$y, length=0, col=col)
|
69
|
+
}
|
70
|
+
points(med, col=col, pch=19, cex=3/2)
|
71
|
+
text(med, labels=cl, col='white', cex=2/3)
|
72
|
+
}
|
73
|
+
}
|
74
|
+
dev.off()
|
75
|
+
|
76
|
+
# Save results
|
77
|
+
for(i in 1:length(top.n)){
|
78
|
+
write.table(ani.medoids[[i]], paste(out_base,i,'medoids',sep='.'),
|
79
|
+
quote=FALSE, col.names=FALSE, row.names=FALSE)
|
80
|
+
classif <- cbind(rownames(ani.types), ani.types[,i],
|
81
|
+
ani.medoids[[i]][ ani.types[,i] ], NA)
|
82
|
+
for(j in 1:nrow(classif))
|
83
|
+
classif[j,4] <- 100 - as.matrix(ani.d)[classif[j,1], classif[j,3]]
|
84
|
+
write.table(classif, paste(out_base,i,'classif',sep='.'),
|
85
|
+
quote=FALSE, col.names=FALSE, row.names=FALSE, sep='\t')
|
86
|
+
}
|
87
|
+
|
88
|
+
# Explore subclades
|
89
|
+
for(i in 1:top.n[1]){
|
90
|
+
medoid <- ani.medoids[[1]][i]
|
91
|
+
ds_f <- rownames(ani.types)[ ani.types[,1]==i ]
|
92
|
+
cat("Analyzing subclade", i, "with medoid:", medoid, "\n")
|
93
|
+
cat(" ds_f: ", ds_f, "\n")
|
94
|
+
if(length(ds_f) > 5){
|
95
|
+
a_f <- a[ (a$a %in% ds_f) & (a$b %in% ds_f), ]
|
96
|
+
dir.create(paste(out_base,'.1.sc-',i,sep=''))
|
97
|
+
write.table(ds_f,
|
98
|
+
paste(out_base,'.1.sc-',i,'/miga-project.all',sep=''),
|
99
|
+
quote=FALSE, col.names=FALSE, row.names=FALSE)
|
100
|
+
cat(" looking for subclades within: ",
|
101
|
+
out_base, ".1.sc-", i, "\n", sep="")
|
102
|
+
subclades(
|
103
|
+
out_base=paste(out_base,'.1.sc-',i,'/miga-project',sep=''),
|
104
|
+
thr=thr, ani=a_f)
|
105
|
+
}
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
# Ancillary functions
|
110
|
+
plotClusterAndMetadata <- function(c,m,addLabels=TRUE,main='',type='factor'){
|
111
|
+
ps <- list()
|
112
|
+
ps[[1]] <- rectGrob(gp=gpar(col="white"))
|
113
|
+
if(length(type)==1) type <- rep(type, ncol(m))
|
114
|
+
if(addLabels){
|
115
|
+
m <- cbind(m, NA)
|
116
|
+
m[labels(c),ncol(m)] <- labels(c)
|
117
|
+
type[ncol(m)] <- 'label'
|
118
|
+
}
|
119
|
+
for(i in 1:ncol(m)){
|
120
|
+
df <- data.frame(lab=factor(labels(c),levels=labels(c)),
|
121
|
+
feat=m[labels(c),i])
|
122
|
+
if(type[i]=='factor'){
|
123
|
+
ps[[i+1]] <- ggplotGrob(ggplot(df, aes(1, lab, fill=factor(feat))) +
|
124
|
+
geom_tile() + geom_text(size=3/4, label=df$feat, x=.8) +
|
125
|
+
scale_x_continuous(expand=c(0,0)) +
|
126
|
+
theme(axis.title=element_blank(), panel.margin=unit(1,'points'),
|
127
|
+
plot.margin=unit(c(40,-12,20,-12),'points'),
|
128
|
+
axis.ticks=element_blank(), axis.text=element_blank(),
|
129
|
+
legend.position="none"))
|
130
|
+
}else if(type[i]=='numeric'){
|
131
|
+
ps[[i+1]] <- ggplotGrob(ggplot(df, aes(1,lab,fill=as.numeric(feat))) +
|
132
|
+
geom_tile() + geom_text(size=3/4, label=df$feat, x=.8) +
|
133
|
+
scale_x_continuous(expand=c(0,0)) +
|
134
|
+
theme(axis.title=element_blank(), panel.margin=unit(1,'points'),
|
135
|
+
plot.margin=unit(c(40,-12,20,-12),'points'),
|
136
|
+
axis.ticks=element_blank(), axis.text=element_blank(),
|
137
|
+
legend.position="none"))
|
138
|
+
}else if(type[i]=='label'){
|
139
|
+
ps[[i+1]] <- ggplotGrob(ggplot(df, aes(1, lab)) +
|
140
|
+
geom_tile(fill='white') + geom_text(size=3/4, label=df$feat, x=.8) +
|
141
|
+
theme(axis.title=element_blank(), panel.margin=unit(1,'points'),
|
142
|
+
plot.margin=unit(c(40,-12,20,-12),'points'),
|
143
|
+
axis.ticks=element_blank(), axis.text=element_blank(),
|
144
|
+
legend.position="none"))
|
145
|
+
}else{
|
146
|
+
stop('Unsupported type: ', type[i])
|
147
|
+
}
|
148
|
+
}
|
149
|
+
ps[[i+2]] <- ggplotGrob(ggplot(segment(dendro_data(c, type="rectangle"))) +
|
150
|
+
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
|
151
|
+
scale_x_continuous(expand=c(0,.5)) +
|
152
|
+
coord_flip() + theme_dendro() +
|
153
|
+
theme(axis.title=element_blank(), axis.ticks=element_blank(),
|
154
|
+
plot.margin=unit(c(40,20,20,ifelse(addLabels,-35,-30)),'points'),
|
155
|
+
panel.margin=unit(0,'points'), axis.text=element_blank(),
|
156
|
+
legend.position="none"))
|
157
|
+
maxHeights = do.call(grid::unit.pmax, lapply(ps, function(x) x$heights[2:5]))
|
158
|
+
for(g in ps) g$heights[2:5] <- as.list(maxHeights)
|
159
|
+
ps$nrow <- 1
|
160
|
+
ps$widths <- c(0.1,rep(.07,ncol(m)),1)
|
161
|
+
ps$main <- main
|
162
|
+
do.call(grid.arrange, ps)
|
163
|
+
return(ps)
|
164
|
+
}
|
165
|
+
|
166
|
+
ggplotColours <- function(n=6, h=c(0, 360)+15, alpha=1){
|
167
|
+
if ((diff(h)%%360) < 1) h[2] <- h[2] - 360/n
|
168
|
+
hcl(h=seq(h[1], h[2], length=n), c=100, l=65, alpha=alpha)
|
169
|
+
}
|
170
|
+
|
171
|
+
|