full_lengther_next 0.6.2 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +5 -0
  5. data/CODE_OF_CONDUCT.md +49 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +21 -0
  8. data/{README.rdoc → README.md} +0 -0
  9. data/Rakefile +6 -37
  10. data/bin/console +14 -0
  11. data/bin/download_fln_dbs.rb +2 -7
  12. data/bin/full_lengther_next +85 -6
  13. data/bin/make_user_db.rb +13 -5
  14. data/bin/setup +8 -0
  15. data/full_lengther_next.gemspec +42 -0
  16. data/lib/full_lengther_next.rb +2 -10
  17. data/lib/full_lengther_next/artifacts.rb +74 -0
  18. data/lib/full_lengther_next/{classes/blast_functions.rb → blast_functions.rb} +0 -0
  19. data/lib/full_lengther_next/{classes/cdhit.rb → cdhit.rb} +0 -0
  20. data/lib/full_lengther_next/{classes/chimeric_seqs.rb → chimeric_seqs.rb} +0 -0
  21. data/lib/full_lengther_next/{classes/common_functions.rb → common_functions.rb} +0 -0
  22. data/lib/full_lengther_next/{classes/exonerate_result.rb → exonerate_result.rb} +0 -0
  23. data/lib/full_lengther_next/{classes/fl_analysis.rb → fl_analysis.rb} +0 -0
  24. data/lib/full_lengther_next/{classes/fl_string_utils.rb → fl_string_utils.rb} +0 -0
  25. data/lib/full_lengther_next/fln_stats.rb +613 -0
  26. data/lib/full_lengther_next/go_methods.rb +42 -0
  27. data/lib/full_lengther_next/{classes/handle_db.rb → handle_db.rb} +0 -0
  28. data/lib/full_lengther_next/mapping.rb +296 -0
  29. data/lib/full_lengther_next/{classes/my_worker.rb → my_worker.rb} +71 -9
  30. data/lib/full_lengther_next/{classes/my_worker_EST.rb → my_worker_EST.rb} +0 -0
  31. data/lib/full_lengther_next/{classes/my_worker_manager_EST.rb → my_worker_manager_EST.rb} +0 -0
  32. data/lib/full_lengther_next/{classes/my_worker_manager_fln.rb → my_worker_manager_fln.rb} +181 -16
  33. data/lib/full_lengther_next/{classes/nc_rna.rb → nc_rna.rb} +0 -0
  34. data/lib/full_lengther_next/{classes/orf.rb → orf.rb} +0 -0
  35. data/lib/full_lengther_next/{classes/reptrans.rb → reptrans.rb} +9 -5
  36. data/lib/full_lengther_next/{classes/sequence.rb → sequence.rb} +26 -1
  37. data/lib/full_lengther_next/{classes/test_code.rb → test_code.rb} +1 -1
  38. data/lib/full_lengther_next/{classes/types.rb → types.rb} +3 -2
  39. data/lib/full_lengther_next/{classes/une_los_hit.rb → une_los_hit.rb} +0 -0
  40. data/lib/full_lengther_next/version.rb +3 -0
  41. data/lib/full_lengther_next/{classes/warnings.rb → warnings.rb} +0 -0
  42. data/report_templates/general_summary.erb +140 -0
  43. data/report_templates/mapping_summary.erb +98 -0
  44. data/report_templates/reptrans_summary.erb +32 -0
  45. metadata +112 -134
  46. data/.gemtest +0 -0
  47. data/History.txt +0 -32
  48. data/Manifest.txt +0 -44
  49. data/PostInstall.txt +0 -6
  50. data/bin/plot_fln.rb +0 -270
  51. data/bin/plot_taxonomy.rb +0 -70
  52. data/lib/expresscanvas.zip +0 -0
  53. data/lib/full_lengther_next/classes/artifacts.rb +0 -66
  54. data/lib/full_lengther_next/classes/fln_stats.rb +0 -641
  55. data/script/console +0 -10
  56. data/script/destroy +0 -14
  57. data/script/generate +0 -14
  58. data/test/test_full_lengther_next.rb +0 -11
  59. data/test/test_helper.rb +0 -3
@@ -1,641 +0,0 @@
1
- require 'types.rb'
2
-
3
- module FlnStats
4
- def initialize_stats_hash
5
- stats_hash = {}
6
- stats_hash['input_seqs'] = 0
7
- stats_hash['output_seqs'] = 0
8
- stats_hash['failed'] = 0
9
- stats_hash['sequences_>200'] = 0
10
- stats_hash['sequences_>500'] = 0
11
- stats_hash['longest_unigene'] = 0
12
- stats_hash['good_seqs'] = 0
13
- stats_hash['artifacts'] = 0
14
- stats_hash['misassembled'] = 0
15
- stats_hash['chimeras'] = 0
16
- stats_hash['other_artifacts'] = 0
17
- stats_hash['unknown'] = 0
18
- stats_hash['unknown_>200'] = 0
19
- stats_hash['unknown_>500'] = 0
20
- stats_hash['prot_annotated'] = 0
21
- stats_hash['complete'] = 0
22
- stats_hash['complete_sure'] = 0
23
- stats_hash['complete_putative'] = 0
24
- stats_hash['n_terminal'] = 0
25
- stats_hash['n_terminal_sure'] = 0
26
- stats_hash['n_terminal_putative'] = 0
27
- stats_hash['c_terminal'] = 0
28
- stats_hash['c_terminal_sure'] = 0
29
- stats_hash['c_terminal_putative'] = 0
30
- stats_hash['internal'] = 0
31
- stats_hash['swissprot'] = 0
32
- stats_hash['trembl'] = 0
33
- stats_hash['userdb'] = 0
34
- stats_hash['ncrna'] = 0
35
- stats_hash['coding'] = 0
36
- stats_hash['coding_sure'] = 0
37
- stats_hash['coding_putative'] = 0
38
- stats_hash['coding_>200'] = 0
39
- stats_hash['coding_>500'] = 0
40
- stats_hash['different_orthologues'] = 0
41
- stats_hash['different_completes'] = 0
42
- stats_hash['BA_index'] = 0
43
-
44
- return stats_hash
45
- end
46
-
47
- def get_taxonomy(name, taxonomy)
48
- organism = nil
49
- if name.include?('OS=')
50
- fields = name.split('OS=',2)
51
- organism = fields.last.split(' GN=').first.strip
52
- elsif name[0..2] = 'sp=' || name[0..2] = 'tr='
53
- name =~ /(\w+ \w+) \(([\w ]+)\) \(([\w ]+)\)/
54
- if !$1.nil?
55
- organism = $1
56
- else
57
- name =~ /(\w+ \w+) \(([\w ]+)\)/
58
- if !$1.nil?
59
- organism = $1
60
- end
61
- end
62
- else
63
- organism = name.split(";",2).last
64
- organism = organism.split('.', 2).first
65
- organism.gsub!(/\(\D+\)/,'')
66
- if organism.split(' ').length > 1
67
- organism.gsub!('.','')
68
- organism.gsub!(/^ /,'')
69
- organism.gsub!(' ','')
70
- organism.strip!
71
- end
72
- end
73
- if !organism.nil?
74
- organism = organism.split(' ')[0..1].join(' ')
75
- if taxonomy[organism].nil?
76
- taxonomy[organism] = 1
77
- else
78
- taxonomy[organism] += 1
79
- end
80
- end
81
- end
82
-
83
- def initialize_stats_hash_reptrans
84
- stats_hash = {}
85
- stats_hash['prot_annotated'] = 0
86
- stats_hash['est_annotated'] = 0
87
- stats_hash['coding_>1'] = 0
88
- stats_hash['coding_>0.94'] = 0
89
- stats_hash['coding_>0.84'] = 0
90
- stats_hash['coding_>0.73'] = 0
91
- stats_hash['coding_>0'] = 0
92
- return stats_hash
93
- end
94
-
95
- def summary_stats(seqs, stats_hash, diff_ids_array, diff_ids_complete_array)
96
- low_limit = 200
97
- upper_limit = 500
98
- #All seqs
99
- #-----------
100
- stats_hash['output_seqs'] += seqs.length
101
- good_seqs = seqs.select{|s| s.type >= UNKNOWN}
102
- stats_hash['good_seqs'] += good_seqs.length
103
-
104
- #Longest_unigene
105
- current_longest_unigene = seqs.map{|s| s.fasta_length}.max
106
- if current_longest_unigene > stats_hash['longest_unigene']
107
- stats_hash['longest_unigene'] = current_longest_unigene
108
- end
109
-
110
- #Load ids
111
- seqs.map{|s|
112
- if s.type > UNKNOWN && s.type < NCRNA
113
- diff_ids_array << s.hit.acc
114
- end}
115
- diff_ids_array.uniq!
116
-
117
- #By Length
118
- stats_hash['sequences_>200'] += good_seqs.select{|s| s.fasta_length > low_limit}.length
119
- stats_hash['sequences_>500'] += good_seqs.select{|s| s.fasta_length > upper_limit}.length
120
-
121
- stats_hash['failed'] += seqs.select{|s| s.type == FAILED}.length
122
-
123
- #Unknown
124
- #-----------------------------
125
- all_unknown = seqs.select{|s| s.type == UNKNOWN}
126
- stats_hash['unknown'] += all_unknown.length
127
-
128
- #By Length
129
- stats_hash['unknown_>200'] += all_unknown.select{|s| s.fasta_length > low_limit}.length
130
- stats_hash['unknown_>500'] += all_unknown.select{|s| s.fasta_length > upper_limit}.length
131
-
132
- #Artifacts
133
- #----------------
134
- stats_hash['artifacts'] += seqs.select{|s| s.type < UNKNOWN && s.type > FAILED}.length
135
- stats_hash['misassembled'] += seqs.select{|s| s.type == MISASSEMBLED}.length
136
- stats_hash['chimeras'] += seqs.select{|s| s.type == CHIMERA && !s.seq_name.include?('_split_')}.length # We don't want count a multiple chimera
137
- stats_hash['other_artifacts'] += seqs.select{|s| s.type == OTHER}.length
138
-
139
- #Annotated with prot
140
- #---------------------
141
- prot_annotated = seqs.select{|s| s.type >= COMPLETE && s.type <= INTERNAL}
142
- stats_hash['prot_annotated'] += prot_annotated.length
143
-
144
- #By annotation
145
- stats_hash['internal'] += seqs.select{|s| s.type == INTERNAL}.length
146
- complete = seqs.select{|s| s.type == COMPLETE}
147
- n_terminal = seqs.select{|s| s.type == N_TERMINAL}
148
- c_terminal = seqs.select{|s| s.type == C_TERMINAL}
149
-
150
- stats_hash['complete'] += complete.length
151
- stats_hash['n_terminal'] += n_terminal.length
152
- stats_hash['c_terminal'] += c_terminal.length
153
-
154
- #Load complete ids
155
- complete.map{|s| diff_ids_complete_array << s.hit.acc}
156
- diff_ids_complete_array.uniq!
157
-
158
- #----> By Status
159
- stats_hash['complete_sure'] += complete.select{|s| s.status}.length
160
- stats_hash['n_terminal_sure'] += n_terminal.select{|s| s.status}.length
161
- stats_hash['c_terminal_sure'] += c_terminal.select{|s| s.status}.length
162
- stats_hash['complete_putative'] += complete.select{|s| !s.status}.length
163
- stats_hash['n_terminal_putative'] += n_terminal.select{|s| !s.status}.length
164
- stats_hash['c_terminal_putative'] += c_terminal.select{|s| !s.status}.length
165
-
166
- #By database
167
- swissprot = prot_annotated.select{|s| s.db_name =~ /^sp_/}.length
168
- trembl = prot_annotated.select{|s| s.db_name =~ /^tr_/}.length
169
- stats_hash['swissprot'] += swissprot
170
- stats_hash['trembl'] += trembl
171
- stats_hash['userdb'] += prot_annotated.length - swissprot - trembl
172
-
173
- #ncRNA
174
- #----------------
175
- stats_hash['ncrna'] += seqs.select{|s| s.type == NCRNA}.length
176
-
177
- #Coding sequences
178
- #----------------
179
- coding = seqs.select{|s| s.type == CODING}
180
- stats_hash['coding'] += coding.length
181
-
182
- #By Status
183
- stats_hash['coding_sure'] += coding.select{|s| s.status}.length
184
- stats_hash['coding_putative'] += coding.select{|s| !s.status}.length
185
-
186
- #By Length
187
- stats_hash['coding_>200'] += coding.select{|s| s.fasta_length > low_limit}.length
188
- stats_hash['coding_>500'] += coding.select{|s| s.fasta_length > upper_limit}.length
189
-
190
-
191
- return stats_hash, diff_ids_array, diff_ids_complete_array
192
- end
193
-
194
- def last_stats(stats_hash, diff_ids_array, diff_ids_complete_array)
195
- stats_hash['different_orthologues'] = diff_ids_array.length
196
- stats_hash['different_completes'] = diff_ids_complete_array.length
197
- #BA index
198
- if stats_hash['prot_annotated'] > 0 &&
199
- stats_hash['complete'] > 0 &&
200
- stats_hash['sequences_>500'] > 0 &&
201
- stats_hash['different_orthologues'] > 0 &&
202
- stats_hash['different_completes'] > 0
203
- coef_anot_geom = (stats_hash['prot_annotated'] * stats_hash['complete'] * 1.0)/(stats_hash['sequences_>500']*10000)
204
- coef_mejora = (stats_hash['different_orthologues']*1.0 + stats_hash['different_completes'])/(stats_hash['prot_annotated'] + stats_hash['complete'])
205
- stats_hash['BA_index'] = Math.sqrt(coef_anot_geom*coef_mejora)
206
- end
207
-
208
- return stats_hash
209
- end
210
-
211
- def coding_stats_reptrans(coding_seq, stats_hash)
212
- group = nil
213
- if coding_seq.t_code > 1
214
- group = 'coding_>1'
215
- elsif coding_seq.t_code > 0.95
216
- group = 'coding_>0.94'
217
- elsif coding_seq.t_code > 0.85
218
- group = 'coding_>0.84'
219
- elsif coding_seq.t_code > 0.73
220
- group = 'coding_>0.73'
221
- elsif coding_seq.t_code > 0
222
- group = 'coding_>0'
223
- end
224
- if !group.nil?
225
- stats_hash[group] += 1
226
- end
227
- end
228
-
229
- def write_summary_stats(stats_hash, stats_taxonomy, diff_ids_array, diff_ids_complete_array, txt_file, html_file)
230
- stats_hash = last_stats(stats_hash, diff_ids_array, diff_ids_complete_array)
231
- write_txt(stats_hash, txt_file)
232
- write_html(stats_hash, html_file, stats_taxonomy)
233
- end
234
-
235
- def write_reptrans_stats(stats_hash, html_file, txt_file)
236
- html = File.open(html_file,'w')
237
- txt = File.open(txt_file,'w')
238
- write_txt(stats_hash, txt)
239
- write_html_reptrans(stats_hash, html)
240
- end
241
-
242
- def write_html_reptrans(stats_hash, html_file)
243
- html_file.puts '<html>'
244
- header(html_file)
245
- body_reptrans(html_file, stats_hash)
246
- html_file.puts '</html>'
247
- end
248
-
249
- def write_txt(stats_hash, file)
250
- stats_hash.each do |key, value|
251
- file.puts "#{value}\t#{key}"
252
- end
253
- end
254
-
255
- def write_html(stats_hash, html_file, stats_taxonomy)
256
- js_path = File.dirname(html_file.to_path)
257
- system("unzip -qq #{File.join(File.dirname(__FILE__), '..', '..', 'expresscanvas.zip')} -d #{js_path}") if !File.exists?(File.join(js_path, 'expresscanvas'))
258
- html_file.puts '<html>'
259
- html_header(html_file, stats_hash, stats_taxonomy)
260
- body(html_file, stats_hash)
261
- html_file.puts '</html>'
262
- end
263
-
264
- def header(html_file)
265
- html_file.puts '<head>',
266
- '<title>FLN Summary</title>',
267
- '</head>'
268
- end
269
-
270
- def html_header(html_file, stats_hash, stats_taxonomy)
271
- structural_data_sure = []
272
- structural_data_sure << stats_hash['unknown']
273
- structural_data_sure << stats_hash['complete_sure']
274
- structural_data_sure << stats_hash['n_terminal_sure']
275
- structural_data_sure << stats_hash['c_terminal_sure']
276
- structural_data_sure << stats_hash['internal']
277
- structural_data_sure << stats_hash['ncrna']
278
- structural_data_sure << stats_hash['coding']
279
-
280
- structural_data_putative = []
281
- structural_data_putative << 0
282
- structural_data_putative << stats_hash['complete_putative']
283
- structural_data_putative << stats_hash['n_terminal_putative']
284
- structural_data_putative << stats_hash['c_terminal_putative']
285
- structural_data_putative << 0
286
- structural_data_putative << 0
287
- structural_data_putative << stats_hash['coding_putative']
288
-
289
- values_structural_sure = "[#{structural_data_sure.map{|stat| stat*100.0/stats_hash['good_seqs']}.join(', ')}]"
290
- values_structural_putative = "[#{structural_data_putative.map{|stat| stat*100.0/stats_hash['good_seqs']}.join(', ')}]"
291
-
292
- data = stats_taxonomy.to_a.sort{|s2, s1| s1.last <=> s2.last}[0..20]
293
- smps_taxonomy = "['#{data.map{|tax| tax.first}.join("', '")}']"
294
- values_taxonomy = "[#{data.map{|tax| tax.last}.join(', ')}]"
295
-
296
- html_file.puts '<head>
297
- <title>FLN Summary</title>
298
- <meta http-equiv="CACHE-CONTROL" CONTENT="NO-CACHE">
299
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
300
-
301
- <!--[if lt IE 9]><script type="text/javascript" src="./expresscanvas/js/flashcanvas.js"></script><![endif]-->
302
- <script type="text/javascript" src="./expresscanvas/js/canvasXpress.min.js"></script>
303
-
304
- <script id=\'demoScript\'>
305
- var showDemo = function () {'
306
-
307
- #'smpTitle': 'Status',
308
-
309
- html_file.puts "new CanvasXpress('profile',
310
- {
311
- 'y' : {
312
- 'vars' : ['Sure', 'Putative'],
313
- 'smps' : ['Unknown', 'Complete', 'N-terminal', 'C-terminal', 'Internal', 'ncrna', 'Coding'],
314
- 'data' : [#{values_structural_sure},
315
- #{values_structural_putative}],
316
- },
317
- 'a' : {
318
- 'xAxis' : ['Sure', 'Putative']
319
- },
320
- },
321
-
322
- {'gradient': false,
323
- 'toolbarPermanent': true,
324
- 'graphOrientation': 'vertical',
325
- 'graphType': 'Stacked',
326
- 'legendBackgroundColor': false,
327
- 'smpLabelScaleFontFactor': 0.8,
328
- 'xAxisTitle': '% sequences',
329
- 'xAxis2Show': false,
330
- 'xAxisExact': true,
331
- 'setMaxX': 80,
332
- 'setMinX': 0,
333
- 'axisTitleScaleFontFactor': 2,
334
- 'smpTitleFontStyle': 'italic',
335
- 'titleHeight': 60
336
- }
337
- );
338
-
339
- new CanvasXpress('taxonomy',
340
- {
341
- 'y' : {
342
- 'vars' : ['Annotations'],
343
- 'smps' : #{smps_taxonomy},
344
- 'data' : [#{values_taxonomy}],
345
- },
346
- 'a' : {
347
- 'xAxis' : ['Sure', 'Putative']
348
- },
349
- },
350
-
351
- {'gradient': false,
352
- 'toolbarPermanent': true,
353
- 'graphOrientation': 'horizontal',
354
- 'showLegend': false,
355
- 'smpLabelScaleFontFactor': 1.5,
356
- 'xAxisTitle': 'Number of sequences',
357
- 'xAxis2Show': false,
358
- 'titleHeight': 60
359
- }
360
- );
361
- }
362
- </script>
363
- </head>"
364
-
365
- end
366
-
367
- def body_reptrans(html_file, stats_hash)
368
- html_file.puts '<body bgcolor="#FFFFFF" >', '<center>' # Start body
369
- html_file.puts '<div style="float:center; font-size:30; margin:10px"><b>', 'Full-LengtherNEXT Representative Transcriptome Summary', '</b></div>'
370
- # TABLES
371
- html_file.puts '<div style=" width: 850px; height: 350px; padding: 10 ">'
372
- reptrans_report(html_file, stats_hash, 'left')
373
- reptrans_acumulative_report(html_file, stats_hash, 'rigth')
374
- html_file.puts '</div>'
375
- # END TABLES
376
- html_file.puts '</center>', '</body>' # End body
377
- end
378
-
379
- def body(html_file, stats_hash)
380
- html_file.puts '<body bgcolor="#FFFFFF" onload="showDemo(); id=demo">', '<center>' # Start body
381
- html_file.puts '<div style="float:center; font-size:30; margin:10px"><b>', 'Full-LengtherNEXT Summary', '</b></div>'
382
-
383
- # TABLES
384
- html_file.puts '<div style="overflow: hidden; width: 950px; height: 550px; padding: 10 ">'
385
- general_report(html_file, stats_hash, 'left')
386
- assembly_report(html_file, stats_hash, 'right')
387
- html_file.puts '</div>'
388
- html_file.puts '<div style="overflow: hidden; width: 950px; height: 550px; padding: 10 ">'
389
- status_graph(html_file, 'left')
390
- status_report(html_file, stats_hash, 'rigth')
391
- html_file.puts '</div>'
392
- html_file.puts '<div style="overflow: hidden; width: 950px; height: 750px; padding: 10 ">'
393
- taxonomy_graph(html_file, 'left')
394
- database_report(html_file, stats_hash, 'rigth')
395
- html_file.puts '</div>'
396
- # END TABLES
397
- html_file.puts '</center>', '</body>' # End body
398
- end
399
-
400
-
401
-
402
- def reptrans_report(html_file, stats_hash, align)
403
- html = []
404
- all_seqs = 0
405
- stats_hash.values.map{|v| all_seqs += v}
406
- html << '<div style=" margin: 0; float:'+align+'">'
407
- html << table_title('Sequences info')
408
- html.concat(table_header(['', 'Sequences', '%'], 0))
409
- html.concat(single_row('Output', all_seqs, all_seqs))
410
- html.concat(single_row('Annotated with protein', stats_hash['prot_annotated'], all_seqs))
411
- html.concat(single_row('Annotated with EST', stats_hash['est_annotated'], all_seqs))
412
- html.concat(single_row('Coding test-code > 1', stats_hash['coding_>1'], all_seqs))
413
- html.concat(single_row('Coding test-code > 0.94', stats_hash['coding_>0.94'], all_seqs))
414
- html.concat(single_row('Coding test-code > 0.84', stats_hash['coding_>0.84'], all_seqs))
415
- html.concat(single_row('Coding test-code > 0.73', stats_hash['coding_>0.73'], all_seqs))
416
- html.concat(single_row('Coding test-code > 0', stats_hash['coding_>0'], all_seqs))
417
- html << '</table>'
418
- html << '</div>'
419
- write_array_html(html, html_file)
420
- end
421
-
422
- def reptrans_acumulative_report(html_file, stats_hash, align)
423
- html = []
424
- all_seqs = 0
425
- stats_hash.values.map{|v| all_seqs += v}
426
- html << '<div style=" margin: 0; float:'+align+'">'
427
- html << table_title('Sequences summary (Acumulative)')
428
- html.concat(table_header(['', 'Sequences', '%'], 0))
429
- acumulative = 0
430
- html.concat(single_row('Annotated with protein', stats_hash['prot_annotated'], all_seqs))
431
- acumulative += stats_hash['prot_annotated']
432
- html.concat(single_row('Annotated with EST', stats_hash['est_annotated'] + acumulative, all_seqs))
433
- acumulative += stats_hash['est_annotated']
434
- html.concat(single_row('Coding test-code > 1', stats_hash['coding_>1'] + acumulative, all_seqs))
435
- acumulative += stats_hash['coding_>1']
436
- html.concat(single_row('Coding test-code > 0.94', stats_hash['coding_>0.94'] + acumulative, all_seqs))
437
- acumulative += stats_hash['coding_>0.94']
438
- html.concat(single_row('Coding test-code > 0.84', stats_hash['coding_>0.84'] + acumulative, all_seqs))
439
- acumulative += stats_hash['coding_>0.84']
440
- html.concat(single_row('Coding test-code > 0.73', stats_hash['coding_>0.73'] + acumulative, all_seqs))
441
- html << '</table>'
442
- html << '</div>'
443
- write_array_html(html, html_file)
444
- end
445
-
446
- def general_report(html_file, stats_hash, align)
447
- html = []
448
- html << '<div style="margin: 0; float:'+align+'">'
449
- html << table_title('General info')
450
- html.concat(table_header(['', 'Sequences', '%'], 0))
451
- html.concat(single_row('Input', stats_hash['input_seqs'], stats_hash['input_seqs']))
452
- html.concat(single_row('Failing sequences', stats_hash['failed'], stats_hash['output_seqs']))
453
- html.concat(single_row('Artifacts <sup>1</sup>', stats_hash['artifacts'], stats_hash['output_seqs']))
454
- html.concat(single_row('Misassembled', stats_hash['misassembled'], stats_hash['artifacts'], TRUE))
455
- html.concat(single_row('Chimeras', stats_hash['chimeras'], stats_hash['artifacts'], TRUE))
456
- html.concat(single_row('Other', stats_hash['other_artifacts'], stats_hash['artifacts'], TRUE))
457
- html.concat(single_row('Sequences with resolved chimeras', stats_hash['output_seqs'], stats_hash['input_seqs']))
458
- html.concat(single_row('Sequences without artifacts', stats_hash['good_seqs'], stats_hash['output_seqs']))
459
- html.concat(single_row('BA index', "%5.2f" % [stats_hash['BA_index']], nil)) if stats_hash['BA_index'] > 0
460
- html << '</table>'
461
- html << '</div>'
462
- write_array_html(html, html_file)
463
- end
464
-
465
- def taxonomy_graph(html_file, align)
466
- html_file.puts '<div style=\'float:'+align+'\'>'
467
- html_file.puts table_title('Taxonomy distribution on annotations')
468
- html_file.puts '<table >
469
- <tr>
470
- <td>
471
- <canvas id=\'taxonomy\' width=\'540\' height=\'640\'></canvas>
472
- </td>
473
- </tr>
474
- </table>
475
- </div>'
476
-
477
- end
478
-
479
-
480
- def database_report(html_file, stats_hash, align)
481
- html = []
482
- html << '<div style=" margin: 0 float:'+align+'">'
483
- html << table_title('Database usage')
484
- html.concat(table_header(['', 'Unigenes', '%'], 0))
485
- html.concat(single_row('UserDB', stats_hash['userdb'], stats_hash['good_seqs']))
486
- html.concat(single_row('SwissProt', stats_hash['swissprot'], stats_hash['good_seqs']))
487
- html.concat(single_row('TrEMBL', stats_hash['trembl'], stats_hash['good_seqs']))
488
- html.concat(single_row('ncRNA', stats_hash['ncrna'], stats_hash['good_seqs']))
489
- html.concat(single_row('None', stats_hash['coding']+ stats_hash['unknown'], stats_hash['good_seqs']))
490
- html.concat(single_row('Total', stats_hash['good_seqs'], stats_hash['good_seqs']))
491
- html << '</table>'
492
- html << '</div>'
493
- write_array_html(html, html_file)
494
- end
495
-
496
- def assembly_report(html_file, stats_hash, align)
497
- html = []
498
- html << '<div style=" margin: 0; float:'+align+'">'
499
- html << table_title('Report guiding assembly quality')
500
- html.concat(table_header(['', 'Unigenes', '%'], 0))
501
- html.concat(single_row('Unigenes', stats_hash['good_seqs'], stats_hash['good_seqs']))
502
- html.concat(single_row('Unigenes >500pb', stats_hash['sequences_>500'], stats_hash['good_seqs']))
503
- html.concat(single_row('Unigenes >200pb', stats_hash['sequences_>200'], stats_hash['good_seqs']))
504
- html.concat(single_row('Longest unigene', stats_hash['longest_unigene'], nil))
505
- html.concat(single_row('With orthologue <sup>1</sup>', stats_hash['prot_annotated'], stats_hash['good_seqs']))
506
- html.concat(single_row('Different orthologue IDs', stats_hash['different_orthologues'], stats_hash['prot_annotated'], TRUE))
507
- html.concat(single_row('Complete transcripts', stats_hash['complete'], stats_hash['prot_annotated'], TRUE))
508
- html.concat(single_row('Different complete transcripts ', stats_hash['different_completes'], stats_hash['prot_annotated'], TRUE))
509
- html.concat(single_row('ncRNA', stats_hash['ncrna'], stats_hash['good_seqs']))
510
- without_orthologue = stats_hash['coding']+ stats_hash['unknown']
511
- html.concat(single_row('Without orthologue <sup>1</sup>', without_orthologue, stats_hash['good_seqs']))
512
- html.concat(single_row('Coding (all)', stats_hash['coding'], without_orthologue, TRUE))
513
- html.concat(single_row('Coding > 200bp', stats_hash['coding_>200'], without_orthologue, TRUE))
514
- html.concat(single_row('Coding > 500bp', stats_hash['coding_>500'], without_orthologue, TRUE))
515
- html.concat(single_row('Unknown (all)', stats_hash['unknown'], without_orthologue, TRUE))
516
- html.concat(single_row('Unknown > 200bp', stats_hash['unknown_>200'], without_orthologue, TRUE))
517
- html.concat(single_row('Unknown > 500bp', stats_hash['unknown_>500'], without_orthologue, TRUE))
518
- html << '</table>'
519
- html << '<sup>1</sup> Percents for subclassifications of this category <br> were calculated using this line as 100% reference.'
520
- html << '</div>'
521
- write_array_html(html, html_file)
522
- end
523
-
524
- def status_graph(html_file, align)
525
- html_file.puts '<div style=\'float:'+align+'\'>'
526
- html_file.puts table_title('Structural profile')
527
- html_file.puts '<table >
528
- <tr>
529
- <td>
530
- <canvas id=\'profile\' width=\'500\' height=\'440\'></canvas>
531
- </td>
532
- </tr>
533
- </table>
534
- </div>'
535
-
536
- end
537
-
538
- def status_report(html_file, stats_hash, align)
539
- html = []
540
- html << '<div style=" margin: 0; float:'+align+'">'
541
- html << table_title('Status report')
542
- html.concat(table_header(['Status', 'Unigenes', '%'], 2))
543
- html.concat(fused_row('Complete', stats_hash['complete_sure'], stats_hash['complete_putative'], stats_hash['good_seqs']))
544
- html.concat(fused_row('C-terminus', stats_hash['c_terminal_sure'], stats_hash['c_terminal_putative'], stats_hash['good_seqs']))
545
- html.concat(fused_row('N-terminus', stats_hash['n_terminal_sure'], stats_hash['n_terminal_putative'], stats_hash['good_seqs']))
546
- html.concat(composed_single_row('Internal', stats_hash['internal'], stats_hash['good_seqs']))
547
- html.concat(fused_row('Coding', stats_hash['coding_sure'], stats_hash['coding_putative'], stats_hash['good_seqs']))
548
- html.concat(composed_single_row('ncRNA', stats_hash['ncrna'], stats_hash['good_seqs']))
549
- html.concat(composed_single_row('Unknown', stats_hash['unknown'], stats_hash['good_seqs']))
550
- html.concat(composed_single_row('Total', stats_hash['good_seqs'], stats_hash['good_seqs']))
551
- html << '</table>'
552
- html << '</div>'
553
- write_array_html(html, html_file)
554
- end
555
-
556
-
557
- def table_title(title)
558
- html = '<div style="font-size:25px; margin: 10"><b>'+title+'</b></div>'
559
- return html
560
- end
561
-
562
- def table_header(col_array, colspan)
563
- html = []
564
-
565
- html << '<table border="2" cellspacing="0" cellpadding="2">'
566
- # Table header
567
- html << '<tr>'
568
- col_array.each_with_index do |col,i|
569
- if i == 0 && colspan > 0
570
- html << '<th colspan="'+colspan.to_s+'">'+col+'</th>'
571
- else
572
- html << '<th>'+col+'</th>'
573
- end
574
- end
575
- html << '</tr>'
576
- return html
577
- end
578
-
579
- def single_row(name, magnitude, total, space = FALSE)
580
- if space
581
- name = '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'+ name
582
- end
583
- html = []
584
- html << '<tr>'
585
- html << '<td align="left">'+name+'</td>'
586
- html.concat(sub_row(magnitude, total))
587
- html << '</tr>'
588
- return html
589
- end
590
-
591
-
592
- def fused_row(type, sure_magnitude, putative_magnitude, total)
593
- html = []
594
- html << '<td rowspan="2" align="left">'+type+'</td>'
595
- html << seq_status('Sure')
596
- html.concat(sub_row(sure_magnitude, total))
597
- html << '</tr>'
598
- html << '<tr>'
599
- html << seq_status('Putative')
600
- html.concat(sub_row(putative_magnitude, total))
601
- html << '</tr>'
602
- return html
603
- end
604
-
605
- def seq_status(status)
606
- html = '<td align="left">'+status+'</td>'
607
- return html
608
- end
609
-
610
- def sub_row(magnitude, total)
611
- if !total.nil?
612
- perc_float = magnitude*100.0/total
613
- if !perc_float.nan?
614
- percentage = '%.2f' % perc_float.to_s
615
- percentage += '%'
616
- else
617
- percentage ='-'
618
- end
619
- else
620
- percentage = '-'
621
- end
622
- html = []
623
- html << '<td align="right">'+magnitude.to_s+'</td>'
624
- html << '<td align="right">'+percentage+'</td>'
625
- return html
626
- end
627
-
628
- def composed_single_row(type, magnitude, total)
629
- html = []
630
- html << '<tr>'
631
- html << '<td colspan="2" align="left">'+type+'</td>'
632
- html.concat(sub_row(magnitude, total))
633
- html << '</tr>'
634
- return html
635
- end
636
-
637
-
638
- def write_array_html(html, html_file)
639
- html.map{|line| html_file.puts line}
640
- end
641
- end