full_lengther_next 0.6.2 → 0.9.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/{README.rdoc → README.md} +0 -0
- data/Rakefile +6 -37
- data/bin/console +14 -0
- data/bin/download_fln_dbs.rb +2 -7
- data/bin/full_lengther_next +85 -6
- data/bin/make_user_db.rb +13 -5
- data/bin/setup +8 -0
- data/full_lengther_next.gemspec +42 -0
- data/lib/full_lengther_next.rb +2 -10
- data/lib/full_lengther_next/artifacts.rb +74 -0
- data/lib/full_lengther_next/{classes/blast_functions.rb → blast_functions.rb} +0 -0
- data/lib/full_lengther_next/{classes/cdhit.rb → cdhit.rb} +0 -0
- data/lib/full_lengther_next/{classes/chimeric_seqs.rb → chimeric_seqs.rb} +0 -0
- data/lib/full_lengther_next/{classes/common_functions.rb → common_functions.rb} +0 -0
- data/lib/full_lengther_next/{classes/exonerate_result.rb → exonerate_result.rb} +0 -0
- data/lib/full_lengther_next/{classes/fl_analysis.rb → fl_analysis.rb} +0 -0
- data/lib/full_lengther_next/{classes/fl_string_utils.rb → fl_string_utils.rb} +0 -0
- data/lib/full_lengther_next/fln_stats.rb +613 -0
- data/lib/full_lengther_next/go_methods.rb +42 -0
- data/lib/full_lengther_next/{classes/handle_db.rb → handle_db.rb} +0 -0
- data/lib/full_lengther_next/mapping.rb +296 -0
- data/lib/full_lengther_next/{classes/my_worker.rb → my_worker.rb} +71 -9
- data/lib/full_lengther_next/{classes/my_worker_EST.rb → my_worker_EST.rb} +0 -0
- data/lib/full_lengther_next/{classes/my_worker_manager_EST.rb → my_worker_manager_EST.rb} +0 -0
- data/lib/full_lengther_next/{classes/my_worker_manager_fln.rb → my_worker_manager_fln.rb} +181 -16
- data/lib/full_lengther_next/{classes/nc_rna.rb → nc_rna.rb} +0 -0
- data/lib/full_lengther_next/{classes/orf.rb → orf.rb} +0 -0
- data/lib/full_lengther_next/{classes/reptrans.rb → reptrans.rb} +9 -5
- data/lib/full_lengther_next/{classes/sequence.rb → sequence.rb} +26 -1
- data/lib/full_lengther_next/{classes/test_code.rb → test_code.rb} +1 -1
- data/lib/full_lengther_next/{classes/types.rb → types.rb} +3 -2
- data/lib/full_lengther_next/{classes/une_los_hit.rb → une_los_hit.rb} +0 -0
- data/lib/full_lengther_next/version.rb +3 -0
- data/lib/full_lengther_next/{classes/warnings.rb → warnings.rb} +0 -0
- data/report_templates/general_summary.erb +140 -0
- data/report_templates/mapping_summary.erb +98 -0
- data/report_templates/reptrans_summary.erb +32 -0
- metadata +112 -134
- data/.gemtest +0 -0
- data/History.txt +0 -32
- data/Manifest.txt +0 -44
- data/PostInstall.txt +0 -6
- data/bin/plot_fln.rb +0 -270
- data/bin/plot_taxonomy.rb +0 -70
- data/lib/expresscanvas.zip +0 -0
- data/lib/full_lengther_next/classes/artifacts.rb +0 -66
- data/lib/full_lengther_next/classes/fln_stats.rb +0 -641
- data/script/console +0 -10
- data/script/destroy +0 -14
- data/script/generate +0 -14
- data/test/test_full_lengther_next.rb +0 -11
- data/test/test_helper.rb +0 -3
@@ -1,641 +0,0 @@
|
|
1
|
-
require 'types.rb'
|
2
|
-
|
3
|
-
module FlnStats
|
4
|
-
def initialize_stats_hash
|
5
|
-
stats_hash = {}
|
6
|
-
stats_hash['input_seqs'] = 0
|
7
|
-
stats_hash['output_seqs'] = 0
|
8
|
-
stats_hash['failed'] = 0
|
9
|
-
stats_hash['sequences_>200'] = 0
|
10
|
-
stats_hash['sequences_>500'] = 0
|
11
|
-
stats_hash['longest_unigene'] = 0
|
12
|
-
stats_hash['good_seqs'] = 0
|
13
|
-
stats_hash['artifacts'] = 0
|
14
|
-
stats_hash['misassembled'] = 0
|
15
|
-
stats_hash['chimeras'] = 0
|
16
|
-
stats_hash['other_artifacts'] = 0
|
17
|
-
stats_hash['unknown'] = 0
|
18
|
-
stats_hash['unknown_>200'] = 0
|
19
|
-
stats_hash['unknown_>500'] = 0
|
20
|
-
stats_hash['prot_annotated'] = 0
|
21
|
-
stats_hash['complete'] = 0
|
22
|
-
stats_hash['complete_sure'] = 0
|
23
|
-
stats_hash['complete_putative'] = 0
|
24
|
-
stats_hash['n_terminal'] = 0
|
25
|
-
stats_hash['n_terminal_sure'] = 0
|
26
|
-
stats_hash['n_terminal_putative'] = 0
|
27
|
-
stats_hash['c_terminal'] = 0
|
28
|
-
stats_hash['c_terminal_sure'] = 0
|
29
|
-
stats_hash['c_terminal_putative'] = 0
|
30
|
-
stats_hash['internal'] = 0
|
31
|
-
stats_hash['swissprot'] = 0
|
32
|
-
stats_hash['trembl'] = 0
|
33
|
-
stats_hash['userdb'] = 0
|
34
|
-
stats_hash['ncrna'] = 0
|
35
|
-
stats_hash['coding'] = 0
|
36
|
-
stats_hash['coding_sure'] = 0
|
37
|
-
stats_hash['coding_putative'] = 0
|
38
|
-
stats_hash['coding_>200'] = 0
|
39
|
-
stats_hash['coding_>500'] = 0
|
40
|
-
stats_hash['different_orthologues'] = 0
|
41
|
-
stats_hash['different_completes'] = 0
|
42
|
-
stats_hash['BA_index'] = 0
|
43
|
-
|
44
|
-
return stats_hash
|
45
|
-
end
|
46
|
-
|
47
|
-
def get_taxonomy(name, taxonomy)
|
48
|
-
organism = nil
|
49
|
-
if name.include?('OS=')
|
50
|
-
fields = name.split('OS=',2)
|
51
|
-
organism = fields.last.split(' GN=').first.strip
|
52
|
-
elsif name[0..2] = 'sp=' || name[0..2] = 'tr='
|
53
|
-
name =~ /(\w+ \w+) \(([\w ]+)\) \(([\w ]+)\)/
|
54
|
-
if !$1.nil?
|
55
|
-
organism = $1
|
56
|
-
else
|
57
|
-
name =~ /(\w+ \w+) \(([\w ]+)\)/
|
58
|
-
if !$1.nil?
|
59
|
-
organism = $1
|
60
|
-
end
|
61
|
-
end
|
62
|
-
else
|
63
|
-
organism = name.split(";",2).last
|
64
|
-
organism = organism.split('.', 2).first
|
65
|
-
organism.gsub!(/\(\D+\)/,'')
|
66
|
-
if organism.split(' ').length > 1
|
67
|
-
organism.gsub!('.','')
|
68
|
-
organism.gsub!(/^ /,'')
|
69
|
-
organism.gsub!(' ','')
|
70
|
-
organism.strip!
|
71
|
-
end
|
72
|
-
end
|
73
|
-
if !organism.nil?
|
74
|
-
organism = organism.split(' ')[0..1].join(' ')
|
75
|
-
if taxonomy[organism].nil?
|
76
|
-
taxonomy[organism] = 1
|
77
|
-
else
|
78
|
-
taxonomy[organism] += 1
|
79
|
-
end
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def initialize_stats_hash_reptrans
|
84
|
-
stats_hash = {}
|
85
|
-
stats_hash['prot_annotated'] = 0
|
86
|
-
stats_hash['est_annotated'] = 0
|
87
|
-
stats_hash['coding_>1'] = 0
|
88
|
-
stats_hash['coding_>0.94'] = 0
|
89
|
-
stats_hash['coding_>0.84'] = 0
|
90
|
-
stats_hash['coding_>0.73'] = 0
|
91
|
-
stats_hash['coding_>0'] = 0
|
92
|
-
return stats_hash
|
93
|
-
end
|
94
|
-
|
95
|
-
def summary_stats(seqs, stats_hash, diff_ids_array, diff_ids_complete_array)
|
96
|
-
low_limit = 200
|
97
|
-
upper_limit = 500
|
98
|
-
#All seqs
|
99
|
-
#-----------
|
100
|
-
stats_hash['output_seqs'] += seqs.length
|
101
|
-
good_seqs = seqs.select{|s| s.type >= UNKNOWN}
|
102
|
-
stats_hash['good_seqs'] += good_seqs.length
|
103
|
-
|
104
|
-
#Longest_unigene
|
105
|
-
current_longest_unigene = seqs.map{|s| s.fasta_length}.max
|
106
|
-
if current_longest_unigene > stats_hash['longest_unigene']
|
107
|
-
stats_hash['longest_unigene'] = current_longest_unigene
|
108
|
-
end
|
109
|
-
|
110
|
-
#Load ids
|
111
|
-
seqs.map{|s|
|
112
|
-
if s.type > UNKNOWN && s.type < NCRNA
|
113
|
-
diff_ids_array << s.hit.acc
|
114
|
-
end}
|
115
|
-
diff_ids_array.uniq!
|
116
|
-
|
117
|
-
#By Length
|
118
|
-
stats_hash['sequences_>200'] += good_seqs.select{|s| s.fasta_length > low_limit}.length
|
119
|
-
stats_hash['sequences_>500'] += good_seqs.select{|s| s.fasta_length > upper_limit}.length
|
120
|
-
|
121
|
-
stats_hash['failed'] += seqs.select{|s| s.type == FAILED}.length
|
122
|
-
|
123
|
-
#Unknown
|
124
|
-
#-----------------------------
|
125
|
-
all_unknown = seqs.select{|s| s.type == UNKNOWN}
|
126
|
-
stats_hash['unknown'] += all_unknown.length
|
127
|
-
|
128
|
-
#By Length
|
129
|
-
stats_hash['unknown_>200'] += all_unknown.select{|s| s.fasta_length > low_limit}.length
|
130
|
-
stats_hash['unknown_>500'] += all_unknown.select{|s| s.fasta_length > upper_limit}.length
|
131
|
-
|
132
|
-
#Artifacts
|
133
|
-
#----------------
|
134
|
-
stats_hash['artifacts'] += seqs.select{|s| s.type < UNKNOWN && s.type > FAILED}.length
|
135
|
-
stats_hash['misassembled'] += seqs.select{|s| s.type == MISASSEMBLED}.length
|
136
|
-
stats_hash['chimeras'] += seqs.select{|s| s.type == CHIMERA && !s.seq_name.include?('_split_')}.length # We don't want count a multiple chimera
|
137
|
-
stats_hash['other_artifacts'] += seqs.select{|s| s.type == OTHER}.length
|
138
|
-
|
139
|
-
#Annotated with prot
|
140
|
-
#---------------------
|
141
|
-
prot_annotated = seqs.select{|s| s.type >= COMPLETE && s.type <= INTERNAL}
|
142
|
-
stats_hash['prot_annotated'] += prot_annotated.length
|
143
|
-
|
144
|
-
#By annotation
|
145
|
-
stats_hash['internal'] += seqs.select{|s| s.type == INTERNAL}.length
|
146
|
-
complete = seqs.select{|s| s.type == COMPLETE}
|
147
|
-
n_terminal = seqs.select{|s| s.type == N_TERMINAL}
|
148
|
-
c_terminal = seqs.select{|s| s.type == C_TERMINAL}
|
149
|
-
|
150
|
-
stats_hash['complete'] += complete.length
|
151
|
-
stats_hash['n_terminal'] += n_terminal.length
|
152
|
-
stats_hash['c_terminal'] += c_terminal.length
|
153
|
-
|
154
|
-
#Load complete ids
|
155
|
-
complete.map{|s| diff_ids_complete_array << s.hit.acc}
|
156
|
-
diff_ids_complete_array.uniq!
|
157
|
-
|
158
|
-
#----> By Status
|
159
|
-
stats_hash['complete_sure'] += complete.select{|s| s.status}.length
|
160
|
-
stats_hash['n_terminal_sure'] += n_terminal.select{|s| s.status}.length
|
161
|
-
stats_hash['c_terminal_sure'] += c_terminal.select{|s| s.status}.length
|
162
|
-
stats_hash['complete_putative'] += complete.select{|s| !s.status}.length
|
163
|
-
stats_hash['n_terminal_putative'] += n_terminal.select{|s| !s.status}.length
|
164
|
-
stats_hash['c_terminal_putative'] += c_terminal.select{|s| !s.status}.length
|
165
|
-
|
166
|
-
#By database
|
167
|
-
swissprot = prot_annotated.select{|s| s.db_name =~ /^sp_/}.length
|
168
|
-
trembl = prot_annotated.select{|s| s.db_name =~ /^tr_/}.length
|
169
|
-
stats_hash['swissprot'] += swissprot
|
170
|
-
stats_hash['trembl'] += trembl
|
171
|
-
stats_hash['userdb'] += prot_annotated.length - swissprot - trembl
|
172
|
-
|
173
|
-
#ncRNA
|
174
|
-
#----------------
|
175
|
-
stats_hash['ncrna'] += seqs.select{|s| s.type == NCRNA}.length
|
176
|
-
|
177
|
-
#Coding sequences
|
178
|
-
#----------------
|
179
|
-
coding = seqs.select{|s| s.type == CODING}
|
180
|
-
stats_hash['coding'] += coding.length
|
181
|
-
|
182
|
-
#By Status
|
183
|
-
stats_hash['coding_sure'] += coding.select{|s| s.status}.length
|
184
|
-
stats_hash['coding_putative'] += coding.select{|s| !s.status}.length
|
185
|
-
|
186
|
-
#By Length
|
187
|
-
stats_hash['coding_>200'] += coding.select{|s| s.fasta_length > low_limit}.length
|
188
|
-
stats_hash['coding_>500'] += coding.select{|s| s.fasta_length > upper_limit}.length
|
189
|
-
|
190
|
-
|
191
|
-
return stats_hash, diff_ids_array, diff_ids_complete_array
|
192
|
-
end
|
193
|
-
|
194
|
-
def last_stats(stats_hash, diff_ids_array, diff_ids_complete_array)
|
195
|
-
stats_hash['different_orthologues'] = diff_ids_array.length
|
196
|
-
stats_hash['different_completes'] = diff_ids_complete_array.length
|
197
|
-
#BA index
|
198
|
-
if stats_hash['prot_annotated'] > 0 &&
|
199
|
-
stats_hash['complete'] > 0 &&
|
200
|
-
stats_hash['sequences_>500'] > 0 &&
|
201
|
-
stats_hash['different_orthologues'] > 0 &&
|
202
|
-
stats_hash['different_completes'] > 0
|
203
|
-
coef_anot_geom = (stats_hash['prot_annotated'] * stats_hash['complete'] * 1.0)/(stats_hash['sequences_>500']*10000)
|
204
|
-
coef_mejora = (stats_hash['different_orthologues']*1.0 + stats_hash['different_completes'])/(stats_hash['prot_annotated'] + stats_hash['complete'])
|
205
|
-
stats_hash['BA_index'] = Math.sqrt(coef_anot_geom*coef_mejora)
|
206
|
-
end
|
207
|
-
|
208
|
-
return stats_hash
|
209
|
-
end
|
210
|
-
|
211
|
-
def coding_stats_reptrans(coding_seq, stats_hash)
|
212
|
-
group = nil
|
213
|
-
if coding_seq.t_code > 1
|
214
|
-
group = 'coding_>1'
|
215
|
-
elsif coding_seq.t_code > 0.95
|
216
|
-
group = 'coding_>0.94'
|
217
|
-
elsif coding_seq.t_code > 0.85
|
218
|
-
group = 'coding_>0.84'
|
219
|
-
elsif coding_seq.t_code > 0.73
|
220
|
-
group = 'coding_>0.73'
|
221
|
-
elsif coding_seq.t_code > 0
|
222
|
-
group = 'coding_>0'
|
223
|
-
end
|
224
|
-
if !group.nil?
|
225
|
-
stats_hash[group] += 1
|
226
|
-
end
|
227
|
-
end
|
228
|
-
|
229
|
-
def write_summary_stats(stats_hash, stats_taxonomy, diff_ids_array, diff_ids_complete_array, txt_file, html_file)
|
230
|
-
stats_hash = last_stats(stats_hash, diff_ids_array, diff_ids_complete_array)
|
231
|
-
write_txt(stats_hash, txt_file)
|
232
|
-
write_html(stats_hash, html_file, stats_taxonomy)
|
233
|
-
end
|
234
|
-
|
235
|
-
def write_reptrans_stats(stats_hash, html_file, txt_file)
|
236
|
-
html = File.open(html_file,'w')
|
237
|
-
txt = File.open(txt_file,'w')
|
238
|
-
write_txt(stats_hash, txt)
|
239
|
-
write_html_reptrans(stats_hash, html)
|
240
|
-
end
|
241
|
-
|
242
|
-
def write_html_reptrans(stats_hash, html_file)
|
243
|
-
html_file.puts '<html>'
|
244
|
-
header(html_file)
|
245
|
-
body_reptrans(html_file, stats_hash)
|
246
|
-
html_file.puts '</html>'
|
247
|
-
end
|
248
|
-
|
249
|
-
def write_txt(stats_hash, file)
|
250
|
-
stats_hash.each do |key, value|
|
251
|
-
file.puts "#{value}\t#{key}"
|
252
|
-
end
|
253
|
-
end
|
254
|
-
|
255
|
-
def write_html(stats_hash, html_file, stats_taxonomy)
|
256
|
-
js_path = File.dirname(html_file.to_path)
|
257
|
-
system("unzip -qq #{File.join(File.dirname(__FILE__), '..', '..', 'expresscanvas.zip')} -d #{js_path}") if !File.exists?(File.join(js_path, 'expresscanvas'))
|
258
|
-
html_file.puts '<html>'
|
259
|
-
html_header(html_file, stats_hash, stats_taxonomy)
|
260
|
-
body(html_file, stats_hash)
|
261
|
-
html_file.puts '</html>'
|
262
|
-
end
|
263
|
-
|
264
|
-
def header(html_file)
|
265
|
-
html_file.puts '<head>',
|
266
|
-
'<title>FLN Summary</title>',
|
267
|
-
'</head>'
|
268
|
-
end
|
269
|
-
|
270
|
-
def html_header(html_file, stats_hash, stats_taxonomy)
|
271
|
-
structural_data_sure = []
|
272
|
-
structural_data_sure << stats_hash['unknown']
|
273
|
-
structural_data_sure << stats_hash['complete_sure']
|
274
|
-
structural_data_sure << stats_hash['n_terminal_sure']
|
275
|
-
structural_data_sure << stats_hash['c_terminal_sure']
|
276
|
-
structural_data_sure << stats_hash['internal']
|
277
|
-
structural_data_sure << stats_hash['ncrna']
|
278
|
-
structural_data_sure << stats_hash['coding']
|
279
|
-
|
280
|
-
structural_data_putative = []
|
281
|
-
structural_data_putative << 0
|
282
|
-
structural_data_putative << stats_hash['complete_putative']
|
283
|
-
structural_data_putative << stats_hash['n_terminal_putative']
|
284
|
-
structural_data_putative << stats_hash['c_terminal_putative']
|
285
|
-
structural_data_putative << 0
|
286
|
-
structural_data_putative << 0
|
287
|
-
structural_data_putative << stats_hash['coding_putative']
|
288
|
-
|
289
|
-
values_structural_sure = "[#{structural_data_sure.map{|stat| stat*100.0/stats_hash['good_seqs']}.join(', ')}]"
|
290
|
-
values_structural_putative = "[#{structural_data_putative.map{|stat| stat*100.0/stats_hash['good_seqs']}.join(', ')}]"
|
291
|
-
|
292
|
-
data = stats_taxonomy.to_a.sort{|s2, s1| s1.last <=> s2.last}[0..20]
|
293
|
-
smps_taxonomy = "['#{data.map{|tax| tax.first}.join("', '")}']"
|
294
|
-
values_taxonomy = "[#{data.map{|tax| tax.last}.join(', ')}]"
|
295
|
-
|
296
|
-
html_file.puts '<head>
|
297
|
-
<title>FLN Summary</title>
|
298
|
-
<meta http-equiv="CACHE-CONTROL" CONTENT="NO-CACHE">
|
299
|
-
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
300
|
-
|
301
|
-
<!--[if lt IE 9]><script type="text/javascript" src="./expresscanvas/js/flashcanvas.js"></script><![endif]-->
|
302
|
-
<script type="text/javascript" src="./expresscanvas/js/canvasXpress.min.js"></script>
|
303
|
-
|
304
|
-
<script id=\'demoScript\'>
|
305
|
-
var showDemo = function () {'
|
306
|
-
|
307
|
-
#'smpTitle': 'Status',
|
308
|
-
|
309
|
-
html_file.puts "new CanvasXpress('profile',
|
310
|
-
{
|
311
|
-
'y' : {
|
312
|
-
'vars' : ['Sure', 'Putative'],
|
313
|
-
'smps' : ['Unknown', 'Complete', 'N-terminal', 'C-terminal', 'Internal', 'ncrna', 'Coding'],
|
314
|
-
'data' : [#{values_structural_sure},
|
315
|
-
#{values_structural_putative}],
|
316
|
-
},
|
317
|
-
'a' : {
|
318
|
-
'xAxis' : ['Sure', 'Putative']
|
319
|
-
},
|
320
|
-
},
|
321
|
-
|
322
|
-
{'gradient': false,
|
323
|
-
'toolbarPermanent': true,
|
324
|
-
'graphOrientation': 'vertical',
|
325
|
-
'graphType': 'Stacked',
|
326
|
-
'legendBackgroundColor': false,
|
327
|
-
'smpLabelScaleFontFactor': 0.8,
|
328
|
-
'xAxisTitle': '% sequences',
|
329
|
-
'xAxis2Show': false,
|
330
|
-
'xAxisExact': true,
|
331
|
-
'setMaxX': 80,
|
332
|
-
'setMinX': 0,
|
333
|
-
'axisTitleScaleFontFactor': 2,
|
334
|
-
'smpTitleFontStyle': 'italic',
|
335
|
-
'titleHeight': 60
|
336
|
-
}
|
337
|
-
);
|
338
|
-
|
339
|
-
new CanvasXpress('taxonomy',
|
340
|
-
{
|
341
|
-
'y' : {
|
342
|
-
'vars' : ['Annotations'],
|
343
|
-
'smps' : #{smps_taxonomy},
|
344
|
-
'data' : [#{values_taxonomy}],
|
345
|
-
},
|
346
|
-
'a' : {
|
347
|
-
'xAxis' : ['Sure', 'Putative']
|
348
|
-
},
|
349
|
-
},
|
350
|
-
|
351
|
-
{'gradient': false,
|
352
|
-
'toolbarPermanent': true,
|
353
|
-
'graphOrientation': 'horizontal',
|
354
|
-
'showLegend': false,
|
355
|
-
'smpLabelScaleFontFactor': 1.5,
|
356
|
-
'xAxisTitle': 'Number of sequences',
|
357
|
-
'xAxis2Show': false,
|
358
|
-
'titleHeight': 60
|
359
|
-
}
|
360
|
-
);
|
361
|
-
}
|
362
|
-
</script>
|
363
|
-
</head>"
|
364
|
-
|
365
|
-
end
|
366
|
-
|
367
|
-
def body_reptrans(html_file, stats_hash)
|
368
|
-
html_file.puts '<body bgcolor="#FFFFFF" >', '<center>' # Start body
|
369
|
-
html_file.puts '<div style="float:center; font-size:30; margin:10px"><b>', 'Full-LengtherNEXT Representative Transcriptome Summary', '</b></div>'
|
370
|
-
# TABLES
|
371
|
-
html_file.puts '<div style=" width: 850px; height: 350px; padding: 10 ">'
|
372
|
-
reptrans_report(html_file, stats_hash, 'left')
|
373
|
-
reptrans_acumulative_report(html_file, stats_hash, 'rigth')
|
374
|
-
html_file.puts '</div>'
|
375
|
-
# END TABLES
|
376
|
-
html_file.puts '</center>', '</body>' # End body
|
377
|
-
end
|
378
|
-
|
379
|
-
def body(html_file, stats_hash)
|
380
|
-
html_file.puts '<body bgcolor="#FFFFFF" onload="showDemo(); id=demo">', '<center>' # Start body
|
381
|
-
html_file.puts '<div style="float:center; font-size:30; margin:10px"><b>', 'Full-LengtherNEXT Summary', '</b></div>'
|
382
|
-
|
383
|
-
# TABLES
|
384
|
-
html_file.puts '<div style="overflow: hidden; width: 950px; height: 550px; padding: 10 ">'
|
385
|
-
general_report(html_file, stats_hash, 'left')
|
386
|
-
assembly_report(html_file, stats_hash, 'right')
|
387
|
-
html_file.puts '</div>'
|
388
|
-
html_file.puts '<div style="overflow: hidden; width: 950px; height: 550px; padding: 10 ">'
|
389
|
-
status_graph(html_file, 'left')
|
390
|
-
status_report(html_file, stats_hash, 'rigth')
|
391
|
-
html_file.puts '</div>'
|
392
|
-
html_file.puts '<div style="overflow: hidden; width: 950px; height: 750px; padding: 10 ">'
|
393
|
-
taxonomy_graph(html_file, 'left')
|
394
|
-
database_report(html_file, stats_hash, 'rigth')
|
395
|
-
html_file.puts '</div>'
|
396
|
-
# END TABLES
|
397
|
-
html_file.puts '</center>', '</body>' # End body
|
398
|
-
end
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
def reptrans_report(html_file, stats_hash, align)
|
403
|
-
html = []
|
404
|
-
all_seqs = 0
|
405
|
-
stats_hash.values.map{|v| all_seqs += v}
|
406
|
-
html << '<div style=" margin: 0; float:'+align+'">'
|
407
|
-
html << table_title('Sequences info')
|
408
|
-
html.concat(table_header(['', 'Sequences', '%'], 0))
|
409
|
-
html.concat(single_row('Output', all_seqs, all_seqs))
|
410
|
-
html.concat(single_row('Annotated with protein', stats_hash['prot_annotated'], all_seqs))
|
411
|
-
html.concat(single_row('Annotated with EST', stats_hash['est_annotated'], all_seqs))
|
412
|
-
html.concat(single_row('Coding test-code > 1', stats_hash['coding_>1'], all_seqs))
|
413
|
-
html.concat(single_row('Coding test-code > 0.94', stats_hash['coding_>0.94'], all_seqs))
|
414
|
-
html.concat(single_row('Coding test-code > 0.84', stats_hash['coding_>0.84'], all_seqs))
|
415
|
-
html.concat(single_row('Coding test-code > 0.73', stats_hash['coding_>0.73'], all_seqs))
|
416
|
-
html.concat(single_row('Coding test-code > 0', stats_hash['coding_>0'], all_seqs))
|
417
|
-
html << '</table>'
|
418
|
-
html << '</div>'
|
419
|
-
write_array_html(html, html_file)
|
420
|
-
end
|
421
|
-
|
422
|
-
def reptrans_acumulative_report(html_file, stats_hash, align)
|
423
|
-
html = []
|
424
|
-
all_seqs = 0
|
425
|
-
stats_hash.values.map{|v| all_seqs += v}
|
426
|
-
html << '<div style=" margin: 0; float:'+align+'">'
|
427
|
-
html << table_title('Sequences summary (Acumulative)')
|
428
|
-
html.concat(table_header(['', 'Sequences', '%'], 0))
|
429
|
-
acumulative = 0
|
430
|
-
html.concat(single_row('Annotated with protein', stats_hash['prot_annotated'], all_seqs))
|
431
|
-
acumulative += stats_hash['prot_annotated']
|
432
|
-
html.concat(single_row('Annotated with EST', stats_hash['est_annotated'] + acumulative, all_seqs))
|
433
|
-
acumulative += stats_hash['est_annotated']
|
434
|
-
html.concat(single_row('Coding test-code > 1', stats_hash['coding_>1'] + acumulative, all_seqs))
|
435
|
-
acumulative += stats_hash['coding_>1']
|
436
|
-
html.concat(single_row('Coding test-code > 0.94', stats_hash['coding_>0.94'] + acumulative, all_seqs))
|
437
|
-
acumulative += stats_hash['coding_>0.94']
|
438
|
-
html.concat(single_row('Coding test-code > 0.84', stats_hash['coding_>0.84'] + acumulative, all_seqs))
|
439
|
-
acumulative += stats_hash['coding_>0.84']
|
440
|
-
html.concat(single_row('Coding test-code > 0.73', stats_hash['coding_>0.73'] + acumulative, all_seqs))
|
441
|
-
html << '</table>'
|
442
|
-
html << '</div>'
|
443
|
-
write_array_html(html, html_file)
|
444
|
-
end
|
445
|
-
|
446
|
-
def general_report(html_file, stats_hash, align)
|
447
|
-
html = []
|
448
|
-
html << '<div style="margin: 0; float:'+align+'">'
|
449
|
-
html << table_title('General info')
|
450
|
-
html.concat(table_header(['', 'Sequences', '%'], 0))
|
451
|
-
html.concat(single_row('Input', stats_hash['input_seqs'], stats_hash['input_seqs']))
|
452
|
-
html.concat(single_row('Failing sequences', stats_hash['failed'], stats_hash['output_seqs']))
|
453
|
-
html.concat(single_row('Artifacts <sup>1</sup>', stats_hash['artifacts'], stats_hash['output_seqs']))
|
454
|
-
html.concat(single_row('Misassembled', stats_hash['misassembled'], stats_hash['artifacts'], TRUE))
|
455
|
-
html.concat(single_row('Chimeras', stats_hash['chimeras'], stats_hash['artifacts'], TRUE))
|
456
|
-
html.concat(single_row('Other', stats_hash['other_artifacts'], stats_hash['artifacts'], TRUE))
|
457
|
-
html.concat(single_row('Sequences with resolved chimeras', stats_hash['output_seqs'], stats_hash['input_seqs']))
|
458
|
-
html.concat(single_row('Sequences without artifacts', stats_hash['good_seqs'], stats_hash['output_seqs']))
|
459
|
-
html.concat(single_row('BA index', "%5.2f" % [stats_hash['BA_index']], nil)) if stats_hash['BA_index'] > 0
|
460
|
-
html << '</table>'
|
461
|
-
html << '</div>'
|
462
|
-
write_array_html(html, html_file)
|
463
|
-
end
|
464
|
-
|
465
|
-
def taxonomy_graph(html_file, align)
|
466
|
-
html_file.puts '<div style=\'float:'+align+'\'>'
|
467
|
-
html_file.puts table_title('Taxonomy distribution on annotations')
|
468
|
-
html_file.puts '<table >
|
469
|
-
<tr>
|
470
|
-
<td>
|
471
|
-
<canvas id=\'taxonomy\' width=\'540\' height=\'640\'></canvas>
|
472
|
-
</td>
|
473
|
-
</tr>
|
474
|
-
</table>
|
475
|
-
</div>'
|
476
|
-
|
477
|
-
end
|
478
|
-
|
479
|
-
|
480
|
-
def database_report(html_file, stats_hash, align)
|
481
|
-
html = []
|
482
|
-
html << '<div style=" margin: 0 float:'+align+'">'
|
483
|
-
html << table_title('Database usage')
|
484
|
-
html.concat(table_header(['', 'Unigenes', '%'], 0))
|
485
|
-
html.concat(single_row('UserDB', stats_hash['userdb'], stats_hash['good_seqs']))
|
486
|
-
html.concat(single_row('SwissProt', stats_hash['swissprot'], stats_hash['good_seqs']))
|
487
|
-
html.concat(single_row('TrEMBL', stats_hash['trembl'], stats_hash['good_seqs']))
|
488
|
-
html.concat(single_row('ncRNA', stats_hash['ncrna'], stats_hash['good_seqs']))
|
489
|
-
html.concat(single_row('None', stats_hash['coding']+ stats_hash['unknown'], stats_hash['good_seqs']))
|
490
|
-
html.concat(single_row('Total', stats_hash['good_seqs'], stats_hash['good_seqs']))
|
491
|
-
html << '</table>'
|
492
|
-
html << '</div>'
|
493
|
-
write_array_html(html, html_file)
|
494
|
-
end
|
495
|
-
|
496
|
-
def assembly_report(html_file, stats_hash, align)
|
497
|
-
html = []
|
498
|
-
html << '<div style=" margin: 0; float:'+align+'">'
|
499
|
-
html << table_title('Report guiding assembly quality')
|
500
|
-
html.concat(table_header(['', 'Unigenes', '%'], 0))
|
501
|
-
html.concat(single_row('Unigenes', stats_hash['good_seqs'], stats_hash['good_seqs']))
|
502
|
-
html.concat(single_row('Unigenes >500pb', stats_hash['sequences_>500'], stats_hash['good_seqs']))
|
503
|
-
html.concat(single_row('Unigenes >200pb', stats_hash['sequences_>200'], stats_hash['good_seqs']))
|
504
|
-
html.concat(single_row('Longest unigene', stats_hash['longest_unigene'], nil))
|
505
|
-
html.concat(single_row('With orthologue <sup>1</sup>', stats_hash['prot_annotated'], stats_hash['good_seqs']))
|
506
|
-
html.concat(single_row('Different orthologue IDs', stats_hash['different_orthologues'], stats_hash['prot_annotated'], TRUE))
|
507
|
-
html.concat(single_row('Complete transcripts', stats_hash['complete'], stats_hash['prot_annotated'], TRUE))
|
508
|
-
html.concat(single_row('Different complete transcripts ', stats_hash['different_completes'], stats_hash['prot_annotated'], TRUE))
|
509
|
-
html.concat(single_row('ncRNA', stats_hash['ncrna'], stats_hash['good_seqs']))
|
510
|
-
without_orthologue = stats_hash['coding']+ stats_hash['unknown']
|
511
|
-
html.concat(single_row('Without orthologue <sup>1</sup>', without_orthologue, stats_hash['good_seqs']))
|
512
|
-
html.concat(single_row('Coding (all)', stats_hash['coding'], without_orthologue, TRUE))
|
513
|
-
html.concat(single_row('Coding > 200bp', stats_hash['coding_>200'], without_orthologue, TRUE))
|
514
|
-
html.concat(single_row('Coding > 500bp', stats_hash['coding_>500'], without_orthologue, TRUE))
|
515
|
-
html.concat(single_row('Unknown (all)', stats_hash['unknown'], without_orthologue, TRUE))
|
516
|
-
html.concat(single_row('Unknown > 200bp', stats_hash['unknown_>200'], without_orthologue, TRUE))
|
517
|
-
html.concat(single_row('Unknown > 500bp', stats_hash['unknown_>500'], without_orthologue, TRUE))
|
518
|
-
html << '</table>'
|
519
|
-
html << '<sup>1</sup> Percents for subclassifications of this category <br> were calculated using this line as 100% reference.'
|
520
|
-
html << '</div>'
|
521
|
-
write_array_html(html, html_file)
|
522
|
-
end
|
523
|
-
|
524
|
-
def status_graph(html_file, align)
|
525
|
-
html_file.puts '<div style=\'float:'+align+'\'>'
|
526
|
-
html_file.puts table_title('Structural profile')
|
527
|
-
html_file.puts '<table >
|
528
|
-
<tr>
|
529
|
-
<td>
|
530
|
-
<canvas id=\'profile\' width=\'500\' height=\'440\'></canvas>
|
531
|
-
</td>
|
532
|
-
</tr>
|
533
|
-
</table>
|
534
|
-
</div>'
|
535
|
-
|
536
|
-
end
|
537
|
-
|
538
|
-
def status_report(html_file, stats_hash, align)
|
539
|
-
html = []
|
540
|
-
html << '<div style=" margin: 0; float:'+align+'">'
|
541
|
-
html << table_title('Status report')
|
542
|
-
html.concat(table_header(['Status', 'Unigenes', '%'], 2))
|
543
|
-
html.concat(fused_row('Complete', stats_hash['complete_sure'], stats_hash['complete_putative'], stats_hash['good_seqs']))
|
544
|
-
html.concat(fused_row('C-terminus', stats_hash['c_terminal_sure'], stats_hash['c_terminal_putative'], stats_hash['good_seqs']))
|
545
|
-
html.concat(fused_row('N-terminus', stats_hash['n_terminal_sure'], stats_hash['n_terminal_putative'], stats_hash['good_seqs']))
|
546
|
-
html.concat(composed_single_row('Internal', stats_hash['internal'], stats_hash['good_seqs']))
|
547
|
-
html.concat(fused_row('Coding', stats_hash['coding_sure'], stats_hash['coding_putative'], stats_hash['good_seqs']))
|
548
|
-
html.concat(composed_single_row('ncRNA', stats_hash['ncrna'], stats_hash['good_seqs']))
|
549
|
-
html.concat(composed_single_row('Unknown', stats_hash['unknown'], stats_hash['good_seqs']))
|
550
|
-
html.concat(composed_single_row('Total', stats_hash['good_seqs'], stats_hash['good_seqs']))
|
551
|
-
html << '</table>'
|
552
|
-
html << '</div>'
|
553
|
-
write_array_html(html, html_file)
|
554
|
-
end
|
555
|
-
|
556
|
-
|
557
|
-
def table_title(title)
|
558
|
-
html = '<div style="font-size:25px; margin: 10"><b>'+title+'</b></div>'
|
559
|
-
return html
|
560
|
-
end
|
561
|
-
|
562
|
-
def table_header(col_array, colspan)
|
563
|
-
html = []
|
564
|
-
|
565
|
-
html << '<table border="2" cellspacing="0" cellpadding="2">'
|
566
|
-
# Table header
|
567
|
-
html << '<tr>'
|
568
|
-
col_array.each_with_index do |col,i|
|
569
|
-
if i == 0 && colspan > 0
|
570
|
-
html << '<th colspan="'+colspan.to_s+'">'+col+'</th>'
|
571
|
-
else
|
572
|
-
html << '<th>'+col+'</th>'
|
573
|
-
end
|
574
|
-
end
|
575
|
-
html << '</tr>'
|
576
|
-
return html
|
577
|
-
end
|
578
|
-
|
579
|
-
def single_row(name, magnitude, total, space = FALSE)
|
580
|
-
if space
|
581
|
-
name = ' '+ name
|
582
|
-
end
|
583
|
-
html = []
|
584
|
-
html << '<tr>'
|
585
|
-
html << '<td align="left">'+name+'</td>'
|
586
|
-
html.concat(sub_row(magnitude, total))
|
587
|
-
html << '</tr>'
|
588
|
-
return html
|
589
|
-
end
|
590
|
-
|
591
|
-
|
592
|
-
def fused_row(type, sure_magnitude, putative_magnitude, total)
|
593
|
-
html = []
|
594
|
-
html << '<td rowspan="2" align="left">'+type+'</td>'
|
595
|
-
html << seq_status('Sure')
|
596
|
-
html.concat(sub_row(sure_magnitude, total))
|
597
|
-
html << '</tr>'
|
598
|
-
html << '<tr>'
|
599
|
-
html << seq_status('Putative')
|
600
|
-
html.concat(sub_row(putative_magnitude, total))
|
601
|
-
html << '</tr>'
|
602
|
-
return html
|
603
|
-
end
|
604
|
-
|
605
|
-
def seq_status(status)
|
606
|
-
html = '<td align="left">'+status+'</td>'
|
607
|
-
return html
|
608
|
-
end
|
609
|
-
|
610
|
-
def sub_row(magnitude, total)
|
611
|
-
if !total.nil?
|
612
|
-
perc_float = magnitude*100.0/total
|
613
|
-
if !perc_float.nan?
|
614
|
-
percentage = '%.2f' % perc_float.to_s
|
615
|
-
percentage += '%'
|
616
|
-
else
|
617
|
-
percentage ='-'
|
618
|
-
end
|
619
|
-
else
|
620
|
-
percentage = '-'
|
621
|
-
end
|
622
|
-
html = []
|
623
|
-
html << '<td align="right">'+magnitude.to_s+'</td>'
|
624
|
-
html << '<td align="right">'+percentage+'</td>'
|
625
|
-
return html
|
626
|
-
end
|
627
|
-
|
628
|
-
def composed_single_row(type, magnitude, total)
|
629
|
-
html = []
|
630
|
-
html << '<tr>'
|
631
|
-
html << '<td colspan="2" align="left">'+type+'</td>'
|
632
|
-
html.concat(sub_row(magnitude, total))
|
633
|
-
html << '</tr>'
|
634
|
-
return html
|
635
|
-
end
|
636
|
-
|
637
|
-
|
638
|
-
def write_array_html(html, html_file)
|
639
|
-
html.map{|line| html_file.puts line}
|
640
|
-
end
|
641
|
-
end
|