full_lengther_next 0.6.2 → 0.9.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +5 -0
  5. data/CODE_OF_CONDUCT.md +49 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +21 -0
  8. data/{README.rdoc → README.md} +0 -0
  9. data/Rakefile +6 -37
  10. data/bin/console +14 -0
  11. data/bin/download_fln_dbs.rb +2 -7
  12. data/bin/full_lengther_next +85 -6
  13. data/bin/make_user_db.rb +13 -5
  14. data/bin/setup +8 -0
  15. data/full_lengther_next.gemspec +42 -0
  16. data/lib/full_lengther_next.rb +2 -10
  17. data/lib/full_lengther_next/artifacts.rb +74 -0
  18. data/lib/full_lengther_next/{classes/blast_functions.rb → blast_functions.rb} +0 -0
  19. data/lib/full_lengther_next/{classes/cdhit.rb → cdhit.rb} +0 -0
  20. data/lib/full_lengther_next/{classes/chimeric_seqs.rb → chimeric_seqs.rb} +0 -0
  21. data/lib/full_lengther_next/{classes/common_functions.rb → common_functions.rb} +0 -0
  22. data/lib/full_lengther_next/{classes/exonerate_result.rb → exonerate_result.rb} +0 -0
  23. data/lib/full_lengther_next/{classes/fl_analysis.rb → fl_analysis.rb} +0 -0
  24. data/lib/full_lengther_next/{classes/fl_string_utils.rb → fl_string_utils.rb} +0 -0
  25. data/lib/full_lengther_next/fln_stats.rb +613 -0
  26. data/lib/full_lengther_next/go_methods.rb +42 -0
  27. data/lib/full_lengther_next/{classes/handle_db.rb → handle_db.rb} +0 -0
  28. data/lib/full_lengther_next/mapping.rb +296 -0
  29. data/lib/full_lengther_next/{classes/my_worker.rb → my_worker.rb} +71 -9
  30. data/lib/full_lengther_next/{classes/my_worker_EST.rb → my_worker_EST.rb} +0 -0
  31. data/lib/full_lengther_next/{classes/my_worker_manager_EST.rb → my_worker_manager_EST.rb} +0 -0
  32. data/lib/full_lengther_next/{classes/my_worker_manager_fln.rb → my_worker_manager_fln.rb} +181 -16
  33. data/lib/full_lengther_next/{classes/nc_rna.rb → nc_rna.rb} +0 -0
  34. data/lib/full_lengther_next/{classes/orf.rb → orf.rb} +0 -0
  35. data/lib/full_lengther_next/{classes/reptrans.rb → reptrans.rb} +9 -5
  36. data/lib/full_lengther_next/{classes/sequence.rb → sequence.rb} +26 -1
  37. data/lib/full_lengther_next/{classes/test_code.rb → test_code.rb} +1 -1
  38. data/lib/full_lengther_next/{classes/types.rb → types.rb} +3 -2
  39. data/lib/full_lengther_next/{classes/une_los_hit.rb → une_los_hit.rb} +0 -0
  40. data/lib/full_lengther_next/version.rb +3 -0
  41. data/lib/full_lengther_next/{classes/warnings.rb → warnings.rb} +0 -0
  42. data/report_templates/general_summary.erb +140 -0
  43. data/report_templates/mapping_summary.erb +98 -0
  44. data/report_templates/reptrans_summary.erb +32 -0
  45. metadata +112 -134
  46. data/.gemtest +0 -0
  47. data/History.txt +0 -32
  48. data/Manifest.txt +0 -44
  49. data/PostInstall.txt +0 -6
  50. data/bin/plot_fln.rb +0 -270
  51. data/bin/plot_taxonomy.rb +0 -70
  52. data/lib/expresscanvas.zip +0 -0
  53. data/lib/full_lengther_next/classes/artifacts.rb +0 -66
  54. data/lib/full_lengther_next/classes/fln_stats.rb +0 -641
  55. data/script/console +0 -10
  56. data/script/destroy +0 -14
  57. data/script/generate +0 -14
  58. data/test/test_full_lengther_next.rb +0 -11
  59. data/test/test_helper.rb +0 -3
@@ -1,641 +0,0 @@
1
- require 'types.rb'
2
-
3
- module FlnStats
4
- def initialize_stats_hash
5
- stats_hash = {}
6
- stats_hash['input_seqs'] = 0
7
- stats_hash['output_seqs'] = 0
8
- stats_hash['failed'] = 0
9
- stats_hash['sequences_>200'] = 0
10
- stats_hash['sequences_>500'] = 0
11
- stats_hash['longest_unigene'] = 0
12
- stats_hash['good_seqs'] = 0
13
- stats_hash['artifacts'] = 0
14
- stats_hash['misassembled'] = 0
15
- stats_hash['chimeras'] = 0
16
- stats_hash['other_artifacts'] = 0
17
- stats_hash['unknown'] = 0
18
- stats_hash['unknown_>200'] = 0
19
- stats_hash['unknown_>500'] = 0
20
- stats_hash['prot_annotated'] = 0
21
- stats_hash['complete'] = 0
22
- stats_hash['complete_sure'] = 0
23
- stats_hash['complete_putative'] = 0
24
- stats_hash['n_terminal'] = 0
25
- stats_hash['n_terminal_sure'] = 0
26
- stats_hash['n_terminal_putative'] = 0
27
- stats_hash['c_terminal'] = 0
28
- stats_hash['c_terminal_sure'] = 0
29
- stats_hash['c_terminal_putative'] = 0
30
- stats_hash['internal'] = 0
31
- stats_hash['swissprot'] = 0
32
- stats_hash['trembl'] = 0
33
- stats_hash['userdb'] = 0
34
- stats_hash['ncrna'] = 0
35
- stats_hash['coding'] = 0
36
- stats_hash['coding_sure'] = 0
37
- stats_hash['coding_putative'] = 0
38
- stats_hash['coding_>200'] = 0
39
- stats_hash['coding_>500'] = 0
40
- stats_hash['different_orthologues'] = 0
41
- stats_hash['different_completes'] = 0
42
- stats_hash['BA_index'] = 0
43
-
44
- return stats_hash
45
- end
46
-
47
- def get_taxonomy(name, taxonomy)
48
- organism = nil
49
- if name.include?('OS=')
50
- fields = name.split('OS=',2)
51
- organism = fields.last.split(' GN=').first.strip
52
- elsif name[0..2] = 'sp=' || name[0..2] = 'tr='
53
- name =~ /(\w+ \w+) \(([\w ]+)\) \(([\w ]+)\)/
54
- if !$1.nil?
55
- organism = $1
56
- else
57
- name =~ /(\w+ \w+) \(([\w ]+)\)/
58
- if !$1.nil?
59
- organism = $1
60
- end
61
- end
62
- else
63
- organism = name.split(";",2).last
64
- organism = organism.split('.', 2).first
65
- organism.gsub!(/\(\D+\)/,'')
66
- if organism.split(' ').length > 1
67
- organism.gsub!('.','')
68
- organism.gsub!(/^ /,'')
69
- organism.gsub!(' ','')
70
- organism.strip!
71
- end
72
- end
73
- if !organism.nil?
74
- organism = organism.split(' ')[0..1].join(' ')
75
- if taxonomy[organism].nil?
76
- taxonomy[organism] = 1
77
- else
78
- taxonomy[organism] += 1
79
- end
80
- end
81
- end
82
-
83
- def initialize_stats_hash_reptrans
84
- stats_hash = {}
85
- stats_hash['prot_annotated'] = 0
86
- stats_hash['est_annotated'] = 0
87
- stats_hash['coding_>1'] = 0
88
- stats_hash['coding_>0.94'] = 0
89
- stats_hash['coding_>0.84'] = 0
90
- stats_hash['coding_>0.73'] = 0
91
- stats_hash['coding_>0'] = 0
92
- return stats_hash
93
- end
94
-
95
- def summary_stats(seqs, stats_hash, diff_ids_array, diff_ids_complete_array)
96
- low_limit = 200
97
- upper_limit = 500
98
- #All seqs
99
- #-----------
100
- stats_hash['output_seqs'] += seqs.length
101
- good_seqs = seqs.select{|s| s.type >= UNKNOWN}
102
- stats_hash['good_seqs'] += good_seqs.length
103
-
104
- #Longest_unigene
105
- current_longest_unigene = seqs.map{|s| s.fasta_length}.max
106
- if current_longest_unigene > stats_hash['longest_unigene']
107
- stats_hash['longest_unigene'] = current_longest_unigene
108
- end
109
-
110
- #Load ids
111
- seqs.map{|s|
112
- if s.type > UNKNOWN && s.type < NCRNA
113
- diff_ids_array << s.hit.acc
114
- end}
115
- diff_ids_array.uniq!
116
-
117
- #By Length
118
- stats_hash['sequences_>200'] += good_seqs.select{|s| s.fasta_length > low_limit}.length
119
- stats_hash['sequences_>500'] += good_seqs.select{|s| s.fasta_length > upper_limit}.length
120
-
121
- stats_hash['failed'] += seqs.select{|s| s.type == FAILED}.length
122
-
123
- #Unknown
124
- #-----------------------------
125
- all_unknown = seqs.select{|s| s.type == UNKNOWN}
126
- stats_hash['unknown'] += all_unknown.length
127
-
128
- #By Length
129
- stats_hash['unknown_>200'] += all_unknown.select{|s| s.fasta_length > low_limit}.length
130
- stats_hash['unknown_>500'] += all_unknown.select{|s| s.fasta_length > upper_limit}.length
131
-
132
- #Artifacts
133
- #----------------
134
- stats_hash['artifacts'] += seqs.select{|s| s.type < UNKNOWN && s.type > FAILED}.length
135
- stats_hash['misassembled'] += seqs.select{|s| s.type == MISASSEMBLED}.length
136
- stats_hash['chimeras'] += seqs.select{|s| s.type == CHIMERA && !s.seq_name.include?('_split_')}.length # We don't want count a multiple chimera
137
- stats_hash['other_artifacts'] += seqs.select{|s| s.type == OTHER}.length
138
-
139
- #Annotated with prot
140
- #---------------------
141
- prot_annotated = seqs.select{|s| s.type >= COMPLETE && s.type <= INTERNAL}
142
- stats_hash['prot_annotated'] += prot_annotated.length
143
-
144
- #By annotation
145
- stats_hash['internal'] += seqs.select{|s| s.type == INTERNAL}.length
146
- complete = seqs.select{|s| s.type == COMPLETE}
147
- n_terminal = seqs.select{|s| s.type == N_TERMINAL}
148
- c_terminal = seqs.select{|s| s.type == C_TERMINAL}
149
-
150
- stats_hash['complete'] += complete.length
151
- stats_hash['n_terminal'] += n_terminal.length
152
- stats_hash['c_terminal'] += c_terminal.length
153
-
154
- #Load complete ids
155
- complete.map{|s| diff_ids_complete_array << s.hit.acc}
156
- diff_ids_complete_array.uniq!
157
-
158
- #----> By Status
159
- stats_hash['complete_sure'] += complete.select{|s| s.status}.length
160
- stats_hash['n_terminal_sure'] += n_terminal.select{|s| s.status}.length
161
- stats_hash['c_terminal_sure'] += c_terminal.select{|s| s.status}.length
162
- stats_hash['complete_putative'] += complete.select{|s| !s.status}.length
163
- stats_hash['n_terminal_putative'] += n_terminal.select{|s| !s.status}.length
164
- stats_hash['c_terminal_putative'] += c_terminal.select{|s| !s.status}.length
165
-
166
- #By database
167
- swissprot = prot_annotated.select{|s| s.db_name =~ /^sp_/}.length
168
- trembl = prot_annotated.select{|s| s.db_name =~ /^tr_/}.length
169
- stats_hash['swissprot'] += swissprot
170
- stats_hash['trembl'] += trembl
171
- stats_hash['userdb'] += prot_annotated.length - swissprot - trembl
172
-
173
- #ncRNA
174
- #----------------
175
- stats_hash['ncrna'] += seqs.select{|s| s.type == NCRNA}.length
176
-
177
- #Coding sequences
178
- #----------------
179
- coding = seqs.select{|s| s.type == CODING}
180
- stats_hash['coding'] += coding.length
181
-
182
- #By Status
183
- stats_hash['coding_sure'] += coding.select{|s| s.status}.length
184
- stats_hash['coding_putative'] += coding.select{|s| !s.status}.length
185
-
186
- #By Length
187
- stats_hash['coding_>200'] += coding.select{|s| s.fasta_length > low_limit}.length
188
- stats_hash['coding_>500'] += coding.select{|s| s.fasta_length > upper_limit}.length
189
-
190
-
191
- return stats_hash, diff_ids_array, diff_ids_complete_array
192
- end
193
-
194
- def last_stats(stats_hash, diff_ids_array, diff_ids_complete_array)
195
- stats_hash['different_orthologues'] = diff_ids_array.length
196
- stats_hash['different_completes'] = diff_ids_complete_array.length
197
- #BA index
198
- if stats_hash['prot_annotated'] > 0 &&
199
- stats_hash['complete'] > 0 &&
200
- stats_hash['sequences_>500'] > 0 &&
201
- stats_hash['different_orthologues'] > 0 &&
202
- stats_hash['different_completes'] > 0
203
- coef_anot_geom = (stats_hash['prot_annotated'] * stats_hash['complete'] * 1.0)/(stats_hash['sequences_>500']*10000)
204
- coef_mejora = (stats_hash['different_orthologues']*1.0 + stats_hash['different_completes'])/(stats_hash['prot_annotated'] + stats_hash['complete'])
205
- stats_hash['BA_index'] = Math.sqrt(coef_anot_geom*coef_mejora)
206
- end
207
-
208
- return stats_hash
209
- end
210
-
211
- def coding_stats_reptrans(coding_seq, stats_hash)
212
- group = nil
213
- if coding_seq.t_code > 1
214
- group = 'coding_>1'
215
- elsif coding_seq.t_code > 0.95
216
- group = 'coding_>0.94'
217
- elsif coding_seq.t_code > 0.85
218
- group = 'coding_>0.84'
219
- elsif coding_seq.t_code > 0.73
220
- group = 'coding_>0.73'
221
- elsif coding_seq.t_code > 0
222
- group = 'coding_>0'
223
- end
224
- if !group.nil?
225
- stats_hash[group] += 1
226
- end
227
- end
228
-
229
- def write_summary_stats(stats_hash, stats_taxonomy, diff_ids_array, diff_ids_complete_array, txt_file, html_file)
230
- stats_hash = last_stats(stats_hash, diff_ids_array, diff_ids_complete_array)
231
- write_txt(stats_hash, txt_file)
232
- write_html(stats_hash, html_file, stats_taxonomy)
233
- end
234
-
235
- def write_reptrans_stats(stats_hash, html_file, txt_file)
236
- html = File.open(html_file,'w')
237
- txt = File.open(txt_file,'w')
238
- write_txt(stats_hash, txt)
239
- write_html_reptrans(stats_hash, html)
240
- end
241
-
242
- def write_html_reptrans(stats_hash, html_file)
243
- html_file.puts '<html>'
244
- header(html_file)
245
- body_reptrans(html_file, stats_hash)
246
- html_file.puts '</html>'
247
- end
248
-
249
- def write_txt(stats_hash, file)
250
- stats_hash.each do |key, value|
251
- file.puts "#{value}\t#{key}"
252
- end
253
- end
254
-
255
- def write_html(stats_hash, html_file, stats_taxonomy)
256
- js_path = File.dirname(html_file.to_path)
257
- system("unzip -qq #{File.join(File.dirname(__FILE__), '..', '..', 'expresscanvas.zip')} -d #{js_path}") if !File.exists?(File.join(js_path, 'expresscanvas'))
258
- html_file.puts '<html>'
259
- html_header(html_file, stats_hash, stats_taxonomy)
260
- body(html_file, stats_hash)
261
- html_file.puts '</html>'
262
- end
263
-
264
- def header(html_file)
265
- html_file.puts '<head>',
266
- '<title>FLN Summary</title>',
267
- '</head>'
268
- end
269
-
270
- def html_header(html_file, stats_hash, stats_taxonomy)
271
- structural_data_sure = []
272
- structural_data_sure << stats_hash['unknown']
273
- structural_data_sure << stats_hash['complete_sure']
274
- structural_data_sure << stats_hash['n_terminal_sure']
275
- structural_data_sure << stats_hash['c_terminal_sure']
276
- structural_data_sure << stats_hash['internal']
277
- structural_data_sure << stats_hash['ncrna']
278
- structural_data_sure << stats_hash['coding']
279
-
280
- structural_data_putative = []
281
- structural_data_putative << 0
282
- structural_data_putative << stats_hash['complete_putative']
283
- structural_data_putative << stats_hash['n_terminal_putative']
284
- structural_data_putative << stats_hash['c_terminal_putative']
285
- structural_data_putative << 0
286
- structural_data_putative << 0
287
- structural_data_putative << stats_hash['coding_putative']
288
-
289
- values_structural_sure = "[#{structural_data_sure.map{|stat| stat*100.0/stats_hash['good_seqs']}.join(', ')}]"
290
- values_structural_putative = "[#{structural_data_putative.map{|stat| stat*100.0/stats_hash['good_seqs']}.join(', ')}]"
291
-
292
- data = stats_taxonomy.to_a.sort{|s2, s1| s1.last <=> s2.last}[0..20]
293
- smps_taxonomy = "['#{data.map{|tax| tax.first}.join("', '")}']"
294
- values_taxonomy = "[#{data.map{|tax| tax.last}.join(', ')}]"
295
-
296
- html_file.puts '<head>
297
- <title>FLN Summary</title>
298
- <meta http-equiv="CACHE-CONTROL" CONTENT="NO-CACHE">
299
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
300
-
301
- <!--[if lt IE 9]><script type="text/javascript" src="./expresscanvas/js/flashcanvas.js"></script><![endif]-->
302
- <script type="text/javascript" src="./expresscanvas/js/canvasXpress.min.js"></script>
303
-
304
- <script id=\'demoScript\'>
305
- var showDemo = function () {'
306
-
307
- #'smpTitle': 'Status',
308
-
309
- html_file.puts "new CanvasXpress('profile',
310
- {
311
- 'y' : {
312
- 'vars' : ['Sure', 'Putative'],
313
- 'smps' : ['Unknown', 'Complete', 'N-terminal', 'C-terminal', 'Internal', 'ncrna', 'Coding'],
314
- 'data' : [#{values_structural_sure},
315
- #{values_structural_putative}],
316
- },
317
- 'a' : {
318
- 'xAxis' : ['Sure', 'Putative']
319
- },
320
- },
321
-
322
- {'gradient': false,
323
- 'toolbarPermanent': true,
324
- 'graphOrientation': 'vertical',
325
- 'graphType': 'Stacked',
326
- 'legendBackgroundColor': false,
327
- 'smpLabelScaleFontFactor': 0.8,
328
- 'xAxisTitle': '% sequences',
329
- 'xAxis2Show': false,
330
- 'xAxisExact': true,
331
- 'setMaxX': 80,
332
- 'setMinX': 0,
333
- 'axisTitleScaleFontFactor': 2,
334
- 'smpTitleFontStyle': 'italic',
335
- 'titleHeight': 60
336
- }
337
- );
338
-
339
- new CanvasXpress('taxonomy',
340
- {
341
- 'y' : {
342
- 'vars' : ['Annotations'],
343
- 'smps' : #{smps_taxonomy},
344
- 'data' : [#{values_taxonomy}],
345
- },
346
- 'a' : {
347
- 'xAxis' : ['Sure', 'Putative']
348
- },
349
- },
350
-
351
- {'gradient': false,
352
- 'toolbarPermanent': true,
353
- 'graphOrientation': 'horizontal',
354
- 'showLegend': false,
355
- 'smpLabelScaleFontFactor': 1.5,
356
- 'xAxisTitle': 'Number of sequences',
357
- 'xAxis2Show': false,
358
- 'titleHeight': 60
359
- }
360
- );
361
- }
362
- </script>
363
- </head>"
364
-
365
- end
366
-
367
- def body_reptrans(html_file, stats_hash)
368
- html_file.puts '<body bgcolor="#FFFFFF" >', '<center>' # Start body
369
- html_file.puts '<div style="float:center; font-size:30; margin:10px"><b>', 'Full-LengtherNEXT Representative Transcriptome Summary', '</b></div>'
370
- # TABLES
371
- html_file.puts '<div style=" width: 850px; height: 350px; padding: 10 ">'
372
- reptrans_report(html_file, stats_hash, 'left')
373
- reptrans_acumulative_report(html_file, stats_hash, 'rigth')
374
- html_file.puts '</div>'
375
- # END TABLES
376
- html_file.puts '</center>', '</body>' # End body
377
- end
378
-
379
- def body(html_file, stats_hash)
380
- html_file.puts '<body bgcolor="#FFFFFF" onload="showDemo(); id=demo">', '<center>' # Start body
381
- html_file.puts '<div style="float:center; font-size:30; margin:10px"><b>', 'Full-LengtherNEXT Summary', '</b></div>'
382
-
383
- # TABLES
384
- html_file.puts '<div style="overflow: hidden; width: 950px; height: 550px; padding: 10 ">'
385
- general_report(html_file, stats_hash, 'left')
386
- assembly_report(html_file, stats_hash, 'right')
387
- html_file.puts '</div>'
388
- html_file.puts '<div style="overflow: hidden; width: 950px; height: 550px; padding: 10 ">'
389
- status_graph(html_file, 'left')
390
- status_report(html_file, stats_hash, 'rigth')
391
- html_file.puts '</div>'
392
- html_file.puts '<div style="overflow: hidden; width: 950px; height: 750px; padding: 10 ">'
393
- taxonomy_graph(html_file, 'left')
394
- database_report(html_file, stats_hash, 'rigth')
395
- html_file.puts '</div>'
396
- # END TABLES
397
- html_file.puts '</center>', '</body>' # End body
398
- end
399
-
400
-
401
-
402
- def reptrans_report(html_file, stats_hash, align)
403
- html = []
404
- all_seqs = 0
405
- stats_hash.values.map{|v| all_seqs += v}
406
- html << '<div style=" margin: 0; float:'+align+'">'
407
- html << table_title('Sequences info')
408
- html.concat(table_header(['', 'Sequences', '%'], 0))
409
- html.concat(single_row('Output', all_seqs, all_seqs))
410
- html.concat(single_row('Annotated with protein', stats_hash['prot_annotated'], all_seqs))
411
- html.concat(single_row('Annotated with EST', stats_hash['est_annotated'], all_seqs))
412
- html.concat(single_row('Coding test-code > 1', stats_hash['coding_>1'], all_seqs))
413
- html.concat(single_row('Coding test-code > 0.94', stats_hash['coding_>0.94'], all_seqs))
414
- html.concat(single_row('Coding test-code > 0.84', stats_hash['coding_>0.84'], all_seqs))
415
- html.concat(single_row('Coding test-code > 0.73', stats_hash['coding_>0.73'], all_seqs))
416
- html.concat(single_row('Coding test-code > 0', stats_hash['coding_>0'], all_seqs))
417
- html << '</table>'
418
- html << '</div>'
419
- write_array_html(html, html_file)
420
- end
421
-
422
- def reptrans_acumulative_report(html_file, stats_hash, align)
423
- html = []
424
- all_seqs = 0
425
- stats_hash.values.map{|v| all_seqs += v}
426
- html << '<div style=" margin: 0; float:'+align+'">'
427
- html << table_title('Sequences summary (Acumulative)')
428
- html.concat(table_header(['', 'Sequences', '%'], 0))
429
- acumulative = 0
430
- html.concat(single_row('Annotated with protein', stats_hash['prot_annotated'], all_seqs))
431
- acumulative += stats_hash['prot_annotated']
432
- html.concat(single_row('Annotated with EST', stats_hash['est_annotated'] + acumulative, all_seqs))
433
- acumulative += stats_hash['est_annotated']
434
- html.concat(single_row('Coding test-code > 1', stats_hash['coding_>1'] + acumulative, all_seqs))
435
- acumulative += stats_hash['coding_>1']
436
- html.concat(single_row('Coding test-code > 0.94', stats_hash['coding_>0.94'] + acumulative, all_seqs))
437
- acumulative += stats_hash['coding_>0.94']
438
- html.concat(single_row('Coding test-code > 0.84', stats_hash['coding_>0.84'] + acumulative, all_seqs))
439
- acumulative += stats_hash['coding_>0.84']
440
- html.concat(single_row('Coding test-code > 0.73', stats_hash['coding_>0.73'] + acumulative, all_seqs))
441
- html << '</table>'
442
- html << '</div>'
443
- write_array_html(html, html_file)
444
- end
445
-
446
- def general_report(html_file, stats_hash, align)
447
- html = []
448
- html << '<div style="margin: 0; float:'+align+'">'
449
- html << table_title('General info')
450
- html.concat(table_header(['', 'Sequences', '%'], 0))
451
- html.concat(single_row('Input', stats_hash['input_seqs'], stats_hash['input_seqs']))
452
- html.concat(single_row('Failing sequences', stats_hash['failed'], stats_hash['output_seqs']))
453
- html.concat(single_row('Artifacts <sup>1</sup>', stats_hash['artifacts'], stats_hash['output_seqs']))
454
- html.concat(single_row('Misassembled', stats_hash['misassembled'], stats_hash['artifacts'], TRUE))
455
- html.concat(single_row('Chimeras', stats_hash['chimeras'], stats_hash['artifacts'], TRUE))
456
- html.concat(single_row('Other', stats_hash['other_artifacts'], stats_hash['artifacts'], TRUE))
457
- html.concat(single_row('Sequences with resolved chimeras', stats_hash['output_seqs'], stats_hash['input_seqs']))
458
- html.concat(single_row('Sequences without artifacts', stats_hash['good_seqs'], stats_hash['output_seqs']))
459
- html.concat(single_row('BA index', "%5.2f" % [stats_hash['BA_index']], nil)) if stats_hash['BA_index'] > 0
460
- html << '</table>'
461
- html << '</div>'
462
- write_array_html(html, html_file)
463
- end
464
-
465
- def taxonomy_graph(html_file, align)
466
- html_file.puts '<div style=\'float:'+align+'\'>'
467
- html_file.puts table_title('Taxonomy distribution on annotations')
468
- html_file.puts '<table >
469
- <tr>
470
- <td>
471
- <canvas id=\'taxonomy\' width=\'540\' height=\'640\'></canvas>
472
- </td>
473
- </tr>
474
- </table>
475
- </div>'
476
-
477
- end
478
-
479
-
480
- def database_report(html_file, stats_hash, align)
481
- html = []
482
- html << '<div style=" margin: 0 float:'+align+'">'
483
- html << table_title('Database usage')
484
- html.concat(table_header(['', 'Unigenes', '%'], 0))
485
- html.concat(single_row('UserDB', stats_hash['userdb'], stats_hash['good_seqs']))
486
- html.concat(single_row('SwissProt', stats_hash['swissprot'], stats_hash['good_seqs']))
487
- html.concat(single_row('TrEMBL', stats_hash['trembl'], stats_hash['good_seqs']))
488
- html.concat(single_row('ncRNA', stats_hash['ncrna'], stats_hash['good_seqs']))
489
- html.concat(single_row('None', stats_hash['coding']+ stats_hash['unknown'], stats_hash['good_seqs']))
490
- html.concat(single_row('Total', stats_hash['good_seqs'], stats_hash['good_seqs']))
491
- html << '</table>'
492
- html << '</div>'
493
- write_array_html(html, html_file)
494
- end
495
-
496
- def assembly_report(html_file, stats_hash, align)
497
- html = []
498
- html << '<div style=" margin: 0; float:'+align+'">'
499
- html << table_title('Report guiding assembly quality')
500
- html.concat(table_header(['', 'Unigenes', '%'], 0))
501
- html.concat(single_row('Unigenes', stats_hash['good_seqs'], stats_hash['good_seqs']))
502
- html.concat(single_row('Unigenes >500pb', stats_hash['sequences_>500'], stats_hash['good_seqs']))
503
- html.concat(single_row('Unigenes >200pb', stats_hash['sequences_>200'], stats_hash['good_seqs']))
504
- html.concat(single_row('Longest unigene', stats_hash['longest_unigene'], nil))
505
- html.concat(single_row('With orthologue <sup>1</sup>', stats_hash['prot_annotated'], stats_hash['good_seqs']))
506
- html.concat(single_row('Different orthologue IDs', stats_hash['different_orthologues'], stats_hash['prot_annotated'], TRUE))
507
- html.concat(single_row('Complete transcripts', stats_hash['complete'], stats_hash['prot_annotated'], TRUE))
508
- html.concat(single_row('Different complete transcripts ', stats_hash['different_completes'], stats_hash['prot_annotated'], TRUE))
509
- html.concat(single_row('ncRNA', stats_hash['ncrna'], stats_hash['good_seqs']))
510
- without_orthologue = stats_hash['coding']+ stats_hash['unknown']
511
- html.concat(single_row('Without orthologue <sup>1</sup>', without_orthologue, stats_hash['good_seqs']))
512
- html.concat(single_row('Coding (all)', stats_hash['coding'], without_orthologue, TRUE))
513
- html.concat(single_row('Coding > 200bp', stats_hash['coding_>200'], without_orthologue, TRUE))
514
- html.concat(single_row('Coding > 500bp', stats_hash['coding_>500'], without_orthologue, TRUE))
515
- html.concat(single_row('Unknown (all)', stats_hash['unknown'], without_orthologue, TRUE))
516
- html.concat(single_row('Unknown > 200bp', stats_hash['unknown_>200'], without_orthologue, TRUE))
517
- html.concat(single_row('Unknown > 500bp', stats_hash['unknown_>500'], without_orthologue, TRUE))
518
- html << '</table>'
519
- html << '<sup>1</sup> Percents for subclassifications of this category <br> were calculated using this line as 100% reference.'
520
- html << '</div>'
521
- write_array_html(html, html_file)
522
- end
523
-
524
- def status_graph(html_file, align)
525
- html_file.puts '<div style=\'float:'+align+'\'>'
526
- html_file.puts table_title('Structural profile')
527
- html_file.puts '<table >
528
- <tr>
529
- <td>
530
- <canvas id=\'profile\' width=\'500\' height=\'440\'></canvas>
531
- </td>
532
- </tr>
533
- </table>
534
- </div>'
535
-
536
- end
537
-
538
- def status_report(html_file, stats_hash, align)
539
- html = []
540
- html << '<div style=" margin: 0; float:'+align+'">'
541
- html << table_title('Status report')
542
- html.concat(table_header(['Status', 'Unigenes', '%'], 2))
543
- html.concat(fused_row('Complete', stats_hash['complete_sure'], stats_hash['complete_putative'], stats_hash['good_seqs']))
544
- html.concat(fused_row('C-terminus', stats_hash['c_terminal_sure'], stats_hash['c_terminal_putative'], stats_hash['good_seqs']))
545
- html.concat(fused_row('N-terminus', stats_hash['n_terminal_sure'], stats_hash['n_terminal_putative'], stats_hash['good_seqs']))
546
- html.concat(composed_single_row('Internal', stats_hash['internal'], stats_hash['good_seqs']))
547
- html.concat(fused_row('Coding', stats_hash['coding_sure'], stats_hash['coding_putative'], stats_hash['good_seqs']))
548
- html.concat(composed_single_row('ncRNA', stats_hash['ncrna'], stats_hash['good_seqs']))
549
- html.concat(composed_single_row('Unknown', stats_hash['unknown'], stats_hash['good_seqs']))
550
- html.concat(composed_single_row('Total', stats_hash['good_seqs'], stats_hash['good_seqs']))
551
- html << '</table>'
552
- html << '</div>'
553
- write_array_html(html, html_file)
554
- end
555
-
556
-
557
- def table_title(title)
558
- html = '<div style="font-size:25px; margin: 10"><b>'+title+'</b></div>'
559
- return html
560
- end
561
-
562
- def table_header(col_array, colspan)
563
- html = []
564
-
565
- html << '<table border="2" cellspacing="0" cellpadding="2">'
566
- # Table header
567
- html << '<tr>'
568
- col_array.each_with_index do |col,i|
569
- if i == 0 && colspan > 0
570
- html << '<th colspan="'+colspan.to_s+'">'+col+'</th>'
571
- else
572
- html << '<th>'+col+'</th>'
573
- end
574
- end
575
- html << '</tr>'
576
- return html
577
- end
578
-
579
- def single_row(name, magnitude, total, space = FALSE)
580
- if space
581
- name = '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'+ name
582
- end
583
- html = []
584
- html << '<tr>'
585
- html << '<td align="left">'+name+'</td>'
586
- html.concat(sub_row(magnitude, total))
587
- html << '</tr>'
588
- return html
589
- end
590
-
591
-
592
- def fused_row(type, sure_magnitude, putative_magnitude, total)
593
- html = []
594
- html << '<td rowspan="2" align="left">'+type+'</td>'
595
- html << seq_status('Sure')
596
- html.concat(sub_row(sure_magnitude, total))
597
- html << '</tr>'
598
- html << '<tr>'
599
- html << seq_status('Putative')
600
- html.concat(sub_row(putative_magnitude, total))
601
- html << '</tr>'
602
- return html
603
- end
604
-
605
- def seq_status(status)
606
- html = '<td align="left">'+status+'</td>'
607
- return html
608
- end
609
-
610
- def sub_row(magnitude, total)
611
- if !total.nil?
612
- perc_float = magnitude*100.0/total
613
- if !perc_float.nan?
614
- percentage = '%.2f' % perc_float.to_s
615
- percentage += '%'
616
- else
617
- percentage ='-'
618
- end
619
- else
620
- percentage = '-'
621
- end
622
- html = []
623
- html << '<td align="right">'+magnitude.to_s+'</td>'
624
- html << '<td align="right">'+percentage+'</td>'
625
- return html
626
- end
627
-
628
- def composed_single_row(type, magnitude, total)
629
- html = []
630
- html << '<tr>'
631
- html << '<td colspan="2" align="left">'+type+'</td>'
632
- html.concat(sub_row(magnitude, total))
633
- html << '</tr>'
634
- return html
635
- end
636
-
637
-
638
- def write_array_html(html, html_file)
639
- html.map{|line| html_file.puts line}
640
- end
641
- end