RubyGems - full_lengther_next - Versions diffs - 0.0.8 → 0.5.6 - Mend

full_lengther_next 0.0.8 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

data/.gemtest +0 -0
data/History.txt +2 -2
data/Manifest.txt +33 -18
data/Rakefile +4 -2
data/bin/download_fln_dbs.rb +310 -158
data/bin/full_lengther_next +160 -103
data/bin/make_test_dataset.rb +236 -0
data/bin/make_user_db.rb +101 -117
data/bin/plot_fln.rb +270 -0
data/bin/plot_taxonomy.rb +70 -0
data/lib/expresscanvas.zip +0 -0
data/lib/full_lengther_next.rb +3 -3
data/lib/full_lengther_next/classes/artifacts.rb +66 -0
data/lib/full_lengther_next/classes/blast_functions.rb +326 -0
data/lib/full_lengther_next/classes/cdhit.rb +154 -0
data/lib/full_lengther_next/classes/chimeric_seqs.rb +315 -57
data/lib/full_lengther_next/classes/common_functions.rb +105 -63
data/lib/full_lengther_next/classes/exonerate_result.rb +258 -0
data/lib/full_lengther_next/classes/fl_analysis.rb +226 -617
data/lib/full_lengther_next/classes/fl_string_utils.rb +4 -2
data/lib/full_lengther_next/classes/fln_stats.rb +598 -557
data/lib/full_lengther_next/classes/handle_db.rb +30 -0
data/lib/full_lengther_next/classes/my_worker.rb +308 -138
data/lib/full_lengther_next/classes/my_worker_EST.rb +54 -0
data/lib/full_lengther_next/classes/my_worker_manager_EST.rb +69 -0
data/lib/full_lengther_next/classes/my_worker_manager_fln.rb +389 -0
data/lib/full_lengther_next/classes/nc_rna.rb +5 -7
data/lib/full_lengther_next/classes/reptrans.rb +210 -0
data/lib/full_lengther_next/classes/sequence.rb +439 -80
data/lib/full_lengther_next/classes/test_code.rb +15 -16
data/lib/full_lengther_next/classes/types.rb +12 -0
data/lib/full_lengther_next/classes/une_los_hit.rb +148 -230
data/lib/full_lengther_next/classes/warnings.rb +40 -0
metadata +207 -93
data/lib/full_lengther_next/classes/lcs.rb +0 -33
data/lib/full_lengther_next/classes/my_worker_manager.rb +0 -240

data/lib/full_lengther_next/classes/fl_string_utils.rb CHANGED

@@ -5,7 +5,7 @@ class String
 		s = self.upcase
 		a = s.split('').each_slice(3).map{|e| e.join}
-	    c={'GCT'=>'A', 'GCC'=>'A','GCA'=>'A','GCG'=>'A',
+	    c={		'GCT'=>'A','GCC'=>'A','GCA'=>'A','GCG'=>'A',
 			'CGT'=>'R','CGC'=>'R','CGA'=>'R','CGG'=>'R','AGA'=>'R','AGG'=>'R',
 			'AAT'=>'N','AAC'=>'N',
 			'GAT'=>'D','GAC'=>'D',
@@ -37,6 +37,8 @@ class String
 				else
 					c[e]||'x'
 				end
+			else
+				'x'
 			end
 		}
 		return res.compact.join
@@ -136,4 +138,4 @@ class String
 	    return self.reverse.split('').map{|e| c[e]}.join
 	end
-end
+end

data/lib/full_lengther_next/classes/fln_stats.rb CHANGED

@@ -1,600 +1,641 @@
+require 'types.rb'
 module FlnStats
-	def summary_stats
-		stats_file = File.open('fln_results/summary_stats.html', 'w')
-		size_filter1 = 200
-		size_filter2 = 500
-		# recogemos los trozos de html fijos
-		(html_head, html_st, html_uni, html_db, html_as, html_end) = html_code
-		total_seqs = 0
-		status_suma = 0
-		#recogemos los datos que necesitamos de los ficheros de resultados
-		(status_array, db_usage, seqs_number1, error_1_num, seq_uniq, complete_uniq, db_uni_500, db_uni_200, db_longest_one) = annotation_stats(size_filter1,size_filter2)
-		(tcode_array, seqs_number2, tc_uni_500, tc_uni_200, tc_longest_one) = testcode_stats(size_filter1,size_filter2)
-		(ncrna_total, nc_uni_500, nc_uni_200, nc_longest_one)=ncrna_stats(size_filter1,size_filter2)
-		(chimera_total, ch_uni_500, ch_uni_200, ch_longest_one, ch_db_usage)=chimera_stats(size_filter1,size_filter2)
-		seqs_number1 = (seqs_number1+chimera_total.to_i)
-		total_seqs = (seqs_number1 + seqs_number2 + ncrna_total.to_i)
-		uni_500 = (db_uni_500 + tc_uni_500 + nc_uni_500 + ch_uni_500)
-		uni_200 = (db_uni_200 + tc_uni_200 + nc_uni_200 + ch_uni_200)
-		longest_one = [db_longest_one, tc_longest_one, nc_longest_one, ch_longest_one].max
-		db_usage[0] += ch_db_usage[0]
-		db_usage[1] += ch_db_usage[1]
-		db_usage[2] += ch_db_usage[2]
-		stats_file.puts html_head
-		if (total_seqs.to_i > 0)
-			# imprimimos la tabla Status Report --------------------------------------------------------------------------------------------
-			stats_file.puts html_st
-			status_array.each do |status|
-				if (status[1] == 'Internal') || (status[1] == 'Misassembled')
-				stats_file.puts '				<tr>
-					<td colspan="2" align="left">'+status[1].to_s+'</td>
-					<td align="right">'+status[0].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*status[0].to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-				elsif (status[1] =~ /^Putative/)
-				stats_file.puts '				<tr>
-					<td align="left">Putative</td>
-					<td align="right">'+status[0].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*status[0].to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-				else
-					stats_file.puts '				<tr>
-					<td rowspan="2" align="left">'+status[1].to_s+'</td>
-					<td align="left">Sure</td>
-					<td align="right">'+status[0].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*status[0].to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-				end
-				status_suma += status[0]
-			end
-			# adding chimeric seqs
-			stats_file.puts '				<tr>
-					<td colspan="2" align="left">Putative chimera</td>
-					<td align="right">'+chimera_total.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*chimera_total.to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-				status_suma += chimera_total
-			# añadimos los coding, P.coding
-			tcode_array.each do |status|
-				if (status[1] == 'Coding')
-				stats_file.puts '				<tr>
-					<td  rowspan="2" align="left">'+status[1].to_s+'</td>
-					<td align="left">Sure</td>
-					<td align="right">'+status[0].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*status[0].to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-				elsif (status[1] == 'Putative Coding')
-				stats_file.puts '				<tr>
-					<td align="left">Putative</td>
-					<td align="right">'+status[0].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*status[0].to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
+	def initialize_stats_hash
+		stats_hash = {}
+		stats_hash['input_seqs'] = 0
+		stats_hash['output_seqs'] = 0
+		stats_hash['failed'] = 0
+		stats_hash['sequences_>200'] = 0
+		stats_hash['sequences_>500'] = 0
+		stats_hash['longest_unigene'] = 0
+		stats_hash['good_seqs'] = 0
+		stats_hash['artifacts'] = 0
+		stats_hash['misassembled'] = 0
+		stats_hash['chimeras'] = 0
+		stats_hash['other_artifacts'] = 0
+		stats_hash['unknown'] = 0
+		stats_hash['unknown_>200'] = 0
+		stats_hash['unknown_>500'] = 0
+		stats_hash['prot_annotated'] = 0
+		stats_hash['complete'] = 0
+		stats_hash['complete_sure'] = 0
+		stats_hash['complete_putative'] = 0
+		stats_hash['n_terminal'] = 0
+		stats_hash['n_terminal_sure'] = 0
+		stats_hash['n_terminal_putative'] = 0
+		stats_hash['c_terminal'] = 0
+		stats_hash['c_terminal_sure'] = 0
+		stats_hash['c_terminal_putative'] = 0
+		stats_hash['internal'] = 0
+		stats_hash['swissprot'] = 0
+		stats_hash['trembl'] = 0
+		stats_hash['userdb'] = 0
+		stats_hash['ncrna'] = 0
+		stats_hash['coding'] = 0
+		stats_hash['coding_sure'] = 0
+		stats_hash['coding_putative'] = 0
+		stats_hash['coding_>200'] = 0
+		stats_hash['coding_>500'] = 0
+		stats_hash['different_orthologues'] = 0
+		stats_hash['different_completes'] = 0
+		stats_hash['BA_index'] = 0
+		return stats_hash
+	end
+	def get_taxonomy(name, taxonomy)
+		organism = nil
+		if name.include?('OS=')
+			fields = name.split('OS=',2)
+			organism = fields.last.split(' GN=').first.strip
+		elsif name[0..2] = 'sp=' || name[0..2] = 'tr='
+			name =~ /(\w+ \w+) \(([\w ]+)\) \(([\w ]+)\)/
+			if !$1.nil?
+				organism = $1
+			else
+				name =~ /(\w+ \w+) \(([\w ]+)\)/
+				if !$1.nil?
+					organism = $1
 				end
-				status_suma += status[0]
 			end
-			# se ponen los ncRNA
-			stats_file.puts '				<tr>
-					<td colspan="2" align="left">Putative ncRNA</td>
-					<td align="right">'+ncrna_total.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*ncrna_total.to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-				status_suma += ncrna_total
-			# se ponen los unknown
-			tcode_array.each do |status|
-				if (status[1] =~ /Unknown/i)
-				stats_file.puts '				<tr>
-					<td colspan="2" align="left">'+status[1].to_s+'</td>
-					<td align="right">'+status[0].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*status[0].to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-				end
+		else
+			organism = name.split(";",2).last
+			organism = organism.split('.', 2).first
+			organism.gsub!(/\(\D+\)/,'')
+			if organism.split(' ').length > 1
+				organism.gsub!('.','')
+				organism.gsub!(/^ /,'')
+				organism.gsub!('  ','')
+				organism.strip!
 			end
-			#se añade el total
-				stats_file.puts '				<tr>
-					<td colspan="2" align="left">Total</td>
-					<td align="right">'+status_suma.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*status_suma.to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>
-			</table>'
-			# imprimimos la tabla Unigene Report --------------------------------------------------------------------------------------------
-			new_genes = tcode_array[0][0] + tcode_array[1][0]
-			total_uni = (seqs_number1 + new_genes + ncrna_total + tcode_array[2][0])
-			stats_file.puts html_uni
-				stats_file.puts '				<tr>
-					<td align="left">With orthologue in DBs</td>
-					<td align="right">'+seqs_number1.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*seqs_number1.to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">Putative New Genes</td>
-					<td align="right">'+new_genes.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*new_genes.to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">ncRNAs</td>
-					<td align="right">'+ncrna_total.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*ncrna_total.to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">Unknown</td>
-					<td align="right">'+tcode_array[2][0].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*tcode_array[2][0].to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">Total</td>
-					<td align="right">'+total_uni.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*total_uni.to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>
-			</table>'
+		end
+		if !organism.nil?
+			organism = organism.split(' ')[0..1].join(' ')
+			if taxonomy[organism].nil?
+				taxonomy[organism] = 1
+			else
+				taxonomy[organism] += 1
+			end
+		end
+	end
-			# imprimimos la tabla Database Usage --------------------------------------------------------------------------------------------
-			stats_file.puts html_db
-			db_names=["UserDB", "SwissProt", "TrEMBL"]
-			total_db = 0
-			for i in 0..db_usage.length-1 do i
-				total_db += db_usage[i]
-				stats_file.puts '				<tr>
-					<td align="left">'+db_names[i].to_s+'</td>
-					<td align="right">'+db_usage[i].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*db_usage[i].to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-			end
-			no_db = seqs_number2 + ncrna_total.to_i
-				stats_file.puts '				<tr>
-					<td align="left">None</td>
-					<td align="right">'+no_db.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*no_db.to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-			total_db += no_db
-				stats_file.puts '				<tr>
-					<td align="left">Total</td>
-					<td align="right">'+total_db.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*total_db.to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>
-			</table>'
-			# imprimimos la tabla Report guiding assembly quality -------------------------------------------------------------
-			stats_file.puts html_as
-				stats_file.puts '				<tr>
-					<td align="left">Unigenes</td>
-					<td align="right">'+total_seqs.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*total_seqs.to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">Unigenes >'+size_filter2.to_s+'pb</td>
-					<td align="right">'+uni_500.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*uni_500.to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">Unigenes >'+size_filter1.to_s+'pb</td>
-					<td align="right">'+uni_200.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*uni_200.to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">Longest unigene</td>
-					<td align="right">'+longest_one.to_s+'</td>
-					<td align="right">-</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">With orthologue <sup>1</sup></td>
-					<td align="right">'+seqs_number1.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*seqs_number1.to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-			if (seqs_number1.to_i > 0)
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Different orthologue IDs</td>
-					<td align="right">'+seq_uniq.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*seq_uniq.to_f/seqs_number1.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Complete transcripts</td>
-					<td align="right">'+status_array[0][0].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*status_array[0][0].to_f/seqs_number1.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Different complete transcripts</td>
-					<td align="right">'+complete_uniq.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*complete_uniq.to_f/seqs_number1.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Misassembled</td>
-					<td align="right">'+error_1_num.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*error_1_num.to_f/seqs_number1.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Putative chimera</td>
-					<td align="right">'+chimera_total.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*chimera_total.to_f/seqs_number1.to_f).to_s+' %</td>
-				</tr>'
-			end
-				stats_file.puts '				<tr>
-					<td align="left">Without orthologue <sup>1</sup></td>
-					<td align="right">'+no_db.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*seqs_number2.to_f/total_seqs.to_f).to_s+' %</td>
-				</tr>'
-			if (no_db.to_i > 0) && (seqs_number2.to_i > 0)
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Coding (all)</td>
-					<td align="right">'+tcode_array[0][0].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*tcode_array[0][0].to_f/no_db.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Coding > '+size_filter1.to_s+'bp</td>
-					<td align="right">'+tcode_array[0][2].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*tcode_array[0][2].to_f/no_db.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Coding > '+size_filter2.to_s+'bp</td>
-					<td align="right">'+tcode_array[0][3].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*tcode_array[0][3].to_f/no_db.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Putative Coding (all)</td>
-					<td align="right">'+tcode_array[1][0].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*tcode_array[1][0].to_f/no_db.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Putative Coding > '+size_filter1.to_s+'bp</td>
-					<td align="right">'+tcode_array[1][2].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*tcode_array[1][2].to_f/no_db.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Putative Coding > '+size_filter2.to_s+'bp</td>
-					<td align="right">'+tcode_array[1][3].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*tcode_array[1][3].to_f/no_db.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Putative ncRNA</td>
-					<td align="right">'+ncrna_total.to_s+'</td>
-					<td align="right">'+'%.2f' % (100*ncrna_total.to_f/no_db.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Unknown (all)</td>
-					<td align="right">'+tcode_array[2][0].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*tcode_array[2][0].to_f/no_db.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Unknown > '+size_filter1.to_s+'bp</td>
-					<td align="right">'+tcode_array[2][2].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*tcode_array[2][2].to_f/no_db.to_f).to_s+' %</td>
-				</tr>'
-				stats_file.puts '				<tr>
-					<td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Unknown > '+size_filter2.to_s+'bp</td>
-					<td align="right">'+tcode_array[2][3].to_s+'</td>
-					<td align="right">'+'%.2f' % (100*tcode_array[2][3].to_f/no_db.to_f).to_s+' %</td>
-				</tr>'
-			end
-			stats_file.puts '			</table>
-			<sup>1</sup> Percents for subclassifications of this category were calculated using this line as 100% reference.'
+	def initialize_stats_hash_reptrans
+		stats_hash = {}
+		stats_hash['prot_annotated'] = 0
+		stats_hash['est_annotated'] = 0
+		stats_hash['coding_>1'] = 0
+		stats_hash['coding_>0.94'] = 0
+		stats_hash['coding_>0.84'] = 0
+		stats_hash['coding_>0.73'] = 0
+		stats_hash['coding_>0'] = 0
+		return stats_hash
+	end
+	def summary_stats(seqs, stats_hash, diff_ids_array, diff_ids_complete_array)
+		low_limit = 200
+		upper_limit = 500
+		#All seqs
+		#-----------
+		stats_hash['output_seqs'] += seqs.length
+		good_seqs = seqs.select{|s| s.type >= UNKNOWN}
+		stats_hash['good_seqs'] += good_seqs.length
+		#Longest_unigene
+		current_longest_unigene = seqs.map{|s| s.fasta_length}.max
+		if current_longest_unigene > stats_hash['longest_unigene']
+			stats_hash['longest_unigene'] = current_longest_unigene
 		end
-		stats_file.puts html_end
+		#Load ids
+		seqs.map{|s|
+			if s.type > UNKNOWN && s.type < NCRNA
+				diff_ids_array << s.hit.acc
+			end}
+		diff_ids_array.uniq!
+		#By Length
+		stats_hash['sequences_>200'] += good_seqs.select{|s| s.fasta_length > low_limit}.length
+		stats_hash['sequences_>500'] += good_seqs.select{|s| s.fasta_length > upper_limit}.length
+		stats_hash['failed'] += seqs.select{|s| s.type == FAILED}.length
+		#Unknown
+		#-----------------------------
+		all_unknown = seqs.select{|s| s.type == UNKNOWN}
+		stats_hash['unknown'] += all_unknown.length
+		#By Length
+		stats_hash['unknown_>200'] += all_unknown.select{|s| s.fasta_length > low_limit}.length
+		stats_hash['unknown_>500'] += all_unknown.select{|s| s.fasta_length > upper_limit}.length
+		#Artifacts
+		#----------------
+		stats_hash['artifacts'] += seqs.select{|s| s.type < UNKNOWN && s.type > FAILED}.length
+		stats_hash['misassembled'] += seqs.select{|s| s.type == MISASSEMBLED}.length
+		stats_hash['chimeras'] += seqs.select{|s| s.type == CHIMERA && !s.seq_name.include?('_split_')}.length # We don't want count a multiple chimera
+		stats_hash['other_artifacts'] += seqs.select{|s| s.type == OTHER}.length
-		stats_file.close
+		#Annotated with prot
+		#---------------------
+		prot_annotated = seqs.select{|s| s.type >= COMPLETE && s.type <= INTERNAL}
+		stats_hash['prot_annotated'] += prot_annotated.length
+		#By annotation
+		stats_hash['internal'] += seqs.select{|s| s.type == INTERNAL}.length
+		complete = seqs.select{|s| s.type == COMPLETE}
+		n_terminal = seqs.select{|s| s.type == N_TERMINAL}
+		c_terminal = seqs.select{|s| s.type == C_TERMINAL}
+		stats_hash['complete'] += complete.length
+		stats_hash['n_terminal'] += n_terminal.length
+		stats_hash['c_terminal'] += c_terminal.length
+		#Load  complete ids
+		complete.map{|s| diff_ids_complete_array << s.hit.acc}
+		diff_ids_complete_array.uniq!
+		#----> By Status
+		stats_hash['complete_sure'] += complete.select{|s| s.status}.length
+		stats_hash['n_terminal_sure'] += n_terminal.select{|s| s.status}.length
+		stats_hash['c_terminal_sure'] += c_terminal.select{|s| s.status}.length
+		stats_hash['complete_putative'] += complete.select{|s| !s.status}.length
+		stats_hash['n_terminal_putative'] += n_terminal.select{|s| !s.status}.length
+		stats_hash['c_terminal_putative'] += c_terminal.select{|s| !s.status}.length
+		#By database
+		swissprot = prot_annotated.select{|s| s.db_name =~ /^sp_/}.length
+		trembl = prot_annotated.select{|s| s.db_name =~ /^tr_/}.length
+		stats_hash['swissprot'] += swissprot
+		stats_hash['trembl'] += trembl
+		stats_hash['userdb'] += prot_annotated.length - swissprot - trembl
+		#ncRNA
+		#----------------
+		stats_hash['ncrna'] += seqs.select{|s| s.type == NCRNA}.length
+		#Coding sequences
+		#----------------
+		coding = seqs.select{|s| s.type == CODING}
+		stats_hash['coding'] += coding.length
+		#By Status
+		stats_hash['coding_sure'] += coding.select{|s| s.status}.length
+		stats_hash['coding_putative'] += coding.select{|s| !s.status}.length
+		#By Length
+		stats_hash['coding_>200'] += coding.select{|s| s.fasta_length > low_limit}.length
+		stats_hash['coding_>500'] += coding.select{|s| s.fasta_length > upper_limit}.length
+		return stats_hash, diff_ids_array, diff_ids_complete_array
 	end
+	def last_stats(stats_hash, diff_ids_array, diff_ids_complete_array)
+		stats_hash['different_orthologues'] = diff_ids_array.length
+		stats_hash['different_completes'] = diff_ids_complete_array.length
+        #BA index
+	    if stats_hash['prot_annotated'] > 0 &&
+	    	stats_hash['complete'] > 0 &&
+	    	stats_hash['sequences_>500'] > 0 &&
+	    	stats_hash['different_orthologues'] > 0 &&
+	    	stats_hash['different_completes'] > 0
+	        coef_anot_geom = (stats_hash['prot_annotated'] * stats_hash['complete'] * 1.0)/(stats_hash['sequences_>500']*10000)
+	        coef_mejora = (stats_hash['different_orthologues']*1.0 + stats_hash['different_completes'])/(stats_hash['prot_annotated'] + stats_hash['complete'])
+	   		stats_hash['BA_index'] = Math.sqrt(coef_anot_geom*coef_mejora)
+	   	end
+		return stats_hash
+	end
+	def coding_stats_reptrans(coding_seq, stats_hash)
+		group = nil
+		if coding_seq.t_code > 1
+			group = 'coding_>1'
+		elsif coding_seq.t_code > 0.95
+			group = 'coding_>0.94'
+		elsif coding_seq.t_code > 0.85
+			group = 'coding_>0.84'
+		elsif coding_seq.t_code > 0.73
+			group = 'coding_>0.73'
+		elsif coding_seq.t_code > 0
+			group = 'coding_>0'
+		end
+		if !group.nil?
+			stats_hash[group] += 1
+		end
+	end
+	def write_summary_stats(stats_hash, stats_taxonomy, diff_ids_array, diff_ids_complete_array, txt_file, html_file)
+		stats_hash = last_stats(stats_hash, diff_ids_array, diff_ids_complete_array)
+		write_txt(stats_hash, txt_file)
+		write_html(stats_hash, html_file, stats_taxonomy)
+	end
-		def html_code
-			html_head = '<html>
-		<head>
-			<title>FLN Summary</title>
-		</head>
-		<body bgcolor="#FFFFFF">
-			<center>
-				<h1 align="center">
-					Full-LengtherNEXT Summary
-				</h1>'
-			html_1 = '
-				<h2 align="center">
-					Status report
-				</h2>
-				<table border="2" cellspacing="0" cellpadding="2">
-					<tr>
-						<th colspan="2">Status</th>
-						<th>Unigenes</th>
-						<th>%</th>
-					</tr>'
-			html_2= '
-				<h2 align="center">
-					Unigene report
-				</h2>
-				<table border="2" cellspacing="0" cellpadding="2">
-					<tr>
-						<th></th>
-						<th>Unigenes</th>
-						<th>%</th>
-					</tr>'
-			html_3= '
-				<h2 align="center">
-					Database usage
-				</h2>
-				<table border="2" cellspacing="0" cellpadding="2">
-					<tr>
-						<th></th>
-						<th>Unigenes</th>
-						<th>%</th>
-					</tr>'
-			html_4= '
-				<h2 align="center">
-					Report guiding assembly quality
-				</h2>
-				<table border="2" cellspacing="0" cellpadding="2">
-					<tr>
-						<th></th>
-						<th>Unigenes</th>
-						<th>%</th>
-					</tr>'
-			html_5 = '	</body>
-	</html>'
-			return [html_head, html_1, html_2, html_3, html_4, html_5]
+	def write_reptrans_stats(stats_hash, html_file, txt_file)
+		html = File.open(html_file,'w')
+		txt = File.open(txt_file,'w')
+		write_txt(stats_hash, txt)
+		write_html_reptrans(stats_hash, html)
+	end
+	def write_html_reptrans(stats_hash, html_file)
+		html_file.puts '<html>'
+		header(html_file)
+		body_reptrans(html_file, stats_hash)
+		html_file.puts '</html>'
+	end
+	def write_txt(stats_hash, file)
+		stats_hash.each do |key, value|
+			file.puts "#{value}\t#{key}"
 		end
+	end
+	def write_html(stats_hash, html_file, stats_taxonomy)
+		js_path = File.dirname(html_file.to_path)
+		system("unzip -qq #{File.join(File.dirname(__FILE__), '..', '..', 'expresscanvas.zip')} -d #{js_path}") if !File.exists?(File.join(js_path, 'expresscanvas'))
+		html_file.puts '<html>'
+		html_header(html_file, stats_hash, stats_taxonomy)
+		body(html_file, stats_hash)
+		html_file.puts '</html>'
+	end
-		def annotation_stats(size_filter1,size_filter2)
-			seqs_number = 0
-			array_of_all_accs = []
-			array_of_complete_accs = []
-			error_1_num = 0
-			uni_500 = 0
-			uni_200 = 0
-			longest_one = 0
-			status_array = []
-			# total, status
-			complete = [0,'Complete']
-			putative_complete = [0,'Putative Complete']
-			c_terminus = [0,'C-terminus']
-			putative_c_terminus = [0,'Putative C-terminus']
-			n_terminus = [0,'N-terminus']
-			putative_n_terminus = [0,'Putative N-terminus']
-			internal = [0,'Internal']
-			cod_seq = [0,'Misassembled']
-			#userdb, SwissProt, TrEMBL
-			db_usage = [0,0,0]
-			File.open('fln_results/dbannotated.txt').each do |line|
-				line.chomp!
-				(name,fasta_length,acc,db_name,status,kk1,kk2,kk3,kk4,kk5,msgs) = line.split("\t")
-				if (line !~ /^Query_id\t/) && (!line.empty?)
-					seqs_number += 1
-					if (fasta_length.to_i > longest_one)
-						longest_one = fasta_length.to_i
-					end
-					array_of_all_accs.push acc
-					if (db_name !~ /^sp_/) && (db_name !~ /^tr_/)
-						db_usage[0] += 1
-					elsif (db_name =~ /^sp_/)
-						db_usage[1] += 1
-					elsif (db_name =~ /^tr_/)
-						db_usage[2] += 1
-					end
-					# -------------------------------------------------------------------------
-					if (fasta_length.to_i >= size_filter1)
-						uni_200 += 1
-					end
-					if (fasta_length.to_i >= size_filter2)
-						uni_500 += 1
-					end
-					# -------------------------------------------------------------------------
-					if (msgs =~ /ERROR#1/)
-						error_1_num += 1
-					end
-					# -------------------------------------------------------------------------
-					if (status == 'Complete')
-						complete[0] += 1
-						array_of_complete_accs.push acc
-					elsif (status == 'Putative Complete')
-						putative_complete[0] += 1
-					elsif (status == 'C-terminus')
-						c_terminus[0] += 1
-					elsif (status == 'N-terminus')
-						n_terminus[0] += 1
-					elsif (status == 'Putative C-terminus')
-						putative_c_terminus[0] += 1
-					elsif (status == 'Putative N-terminus')
-						putative_n_terminus[0] += 1
-					elsif (status == 'Internal')
-						internal[0] += 1
-					elsif (status == 'Misassembled')
-						cod_seq[0] += 1
-					end
-					# -------------------------------------------------------------------------
-				end
+	def header(html_file)
+		html_file.puts 	'<head>',
+						'<title>FLN Summary</title>',
+						'</head>'
+	end
-			end
+	def html_header(html_file, stats_hash, stats_taxonomy)
+    	structural_data_sure = []
+    	structural_data_sure << stats_hash['unknown']
+    	structural_data_sure << stats_hash['complete_sure']
+    	structural_data_sure << stats_hash['n_terminal_sure']
+    	structural_data_sure << stats_hash['c_terminal_sure']
+    	structural_data_sure << stats_hash['internal']
+    	structural_data_sure << stats_hash['ncrna']
+    	structural_data_sure << stats_hash['coding']
+    	structural_data_putative = []
+    	structural_data_putative << 0
+    	structural_data_putative << stats_hash['complete_putative']
+    	structural_data_putative << stats_hash['n_terminal_putative']
+    	structural_data_putative << stats_hash['c_terminal_putative']
+    	structural_data_putative << 0
+    	structural_data_putative << 0
+    	structural_data_putative << stats_hash['coding_putative']
+    	values_structural_sure = "[#{structural_data_sure.map{|stat| stat*100.0/stats_hash['good_seqs']}.join(', ')}]"
+    	values_structural_putative = "[#{structural_data_putative.map{|stat| stat*100.0/stats_hash['good_seqs']}.join(', ')}]"
+    	data = stats_taxonomy.to_a.sort{|s2, s1| s1.last <=> s2.last}[0..20]
+    	smps_taxonomy = "['#{data.map{|tax| tax.first}.join("', '")}']"
+    	values_taxonomy = "[#{data.map{|tax| tax.last}.join(', ')}]"
+		html_file.puts 	'<head>
+			<title>FLN Summary</title>
+			<meta http-equiv="CACHE-CONTROL" CONTENT="NO-CACHE">
+			<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-			status_array = [complete, putative_complete, c_terminus, putative_c_terminus, n_terminus, putative_n_terminus, internal, cod_seq]
+    		<!--[if lt IE 9]><script type="text/javascript" src="./expresscanvas/js/flashcanvas.js"></script><![endif]-->
+    		<script type="text/javascript" src="./expresscanvas/js/canvasXpress.min.js"></script>
-			return [status_array, db_usage, seqs_number, error_1_num, array_of_all_accs.uniq.count, array_of_complete_accs.uniq.count, uni_500, uni_200, longest_one]
-		end
+    		<script id=\'demoScript\'>
+    			var showDemo = function () {'
+		#'smpTitle': 'Status',
+		html_file.puts "new CanvasXpress('profile',
+					  {
+						'y' : {
+						  'vars' : ['Sure', 'Putative'],
+						  'smps' : ['Unknown', 'Complete', 'N-terminal', 'C-terminal', 'Internal', 'ncrna', 'Coding'],
+						  'data' : [#{values_structural_sure},
+						  			#{values_structural_putative}],
+						},
+						'a' : {
+						  'xAxis' : ['Sure', 'Putative']
+						},
+					  },
+					  {'gradient': false,
+					  'toolbarPermanent': true,
+					  'graphOrientation': 'vertical',
+					  'graphType': 'Stacked',
+					  'legendBackgroundColor': false,
+					  'smpLabelScaleFontFactor': 0.8,
+					  'xAxisTitle': '% sequences',
+					  'xAxis2Show': false,
+					  'xAxisExact': true,
+					  'setMaxX': 80,
+					  'setMinX': 0,
+					  'axisTitleScaleFontFactor': 2,
+					  'smpTitleFontStyle': 'italic',
+					  'titleHeight': 60
+					  }
+					);
+					new CanvasXpress('taxonomy',
+					  {
+						'y' : {
+						  'vars' : ['Annotations'],
+						  'smps' : #{smps_taxonomy},
+						  'data' : [#{values_taxonomy}],
+						},
+						'a' : {
+						  'xAxis' : ['Sure', 'Putative']
+						},
+					  },
+					  {'gradient': false,
+					  'toolbarPermanent': true,
+					  'graphOrientation': 'horizontal',
+					  'showLegend': false,
+					  'smpLabelScaleFontFactor': 1.5,
+					  'xAxisTitle': 'Number of sequences',
+					  'xAxis2Show': false,
+					  'titleHeight': 60
+					  }
+					);
+				}
+   			</script>
+		</head>"
+	end
-		def testcode_stats(size_filter1,size_filter2)
-			seqs_number = 0
-			uni_500 = 0
-			uni_200 = 0
-			longest_one = 0
-			# total, status
-			coding_stats = [0,'Coding',0,0]
-			p_coding_stats = [0,'Putative Coding',0,0]
-			unknown_stats = [0,'Unknown',0,0]
-			File.open('fln_results/new_coding.txt').each do |line|
-				line.chomp!
-				(name,fasta_length,acc,db_name,status) = line.split("\t")
-				if (line !~ /^Query_id\t/) && (!line.empty?)
-					seqs_number += 1
-					if (fasta_length.to_i > longest_one)
-						longest_one = fasta_length.to_i
-					end
-					# -------------------------------------------------------------------------
-					if (fasta_length.to_i >= size_filter1)
-						uni_200 += 1
-					end
-					if (fasta_length.to_i >= size_filter2)
-						uni_500 += 1
-					end
-					# -------------------------------------------------------------------------
-					if (fasta_length.to_i > size_filter1)
-						if (status == 'coding')
-							coding_stats[2] += 1
-						elsif (status == 'putative_coding')
-							p_coding_stats[2] += 1
-						elsif (status == 'unknown')
-							unknown_stats[2] += 1
-						end
-					end
-					if (fasta_length.to_i > size_filter2)
-						if (status == 'coding')
-							coding_stats[3] += 1
-						elsif (status == 'putative_coding')
-							p_coding_stats[3] += 1
-						elsif (status == 'unknown')
-							unknown_stats[3] += 1
-						end
-					end
-					if (status == 'coding')
-						coding_stats[0] += 1
-					elsif (status == 'putative_coding')
-						p_coding_stats[0] += 1
-					elsif (status == 'unknown')
-						unknown_stats[0] += 1
-					end
+	def body_reptrans(html_file, stats_hash)
+		html_file.puts '<body bgcolor="#FFFFFF" >', '<center>' # Start body
+		html_file.puts '<div style="float:center; font-size:30; margin:10px"><b>', 'Full-LengtherNEXT Representative Transcriptome Summary', '</b></div>'
+		# TABLES
+		html_file.puts '<div style=" width: 850px; height: 350px; padding: 10 ">'
+		reptrans_report(html_file, stats_hash, 'left')
+		reptrans_acumulative_report(html_file, stats_hash, 'rigth')
+		html_file.puts '</div>'
+		# END TABLES
+		html_file.puts '</center>', '</body>' # End body
+	end
-				end
+	def body(html_file, stats_hash)
+		html_file.puts '<body bgcolor="#FFFFFF" onload="showDemo(); id=demo">', '<center>' # Start body
+		html_file.puts '<div style="float:center; font-size:30; margin:10px"><b>', 'Full-LengtherNEXT Summary', '</b></div>'
+		# TABLES
+		html_file.puts '<div style="overflow: hidden; width: 950px; height: 550px; padding: 10 ">'
+		general_report(html_file, stats_hash, 'left')
+		assembly_report(html_file, stats_hash, 'right')
+		html_file.puts '</div>'
+		html_file.puts '<div style="overflow: hidden; width: 950px; height: 550px; padding: 10 ">'
+		status_graph(html_file, 'left')
+		status_report(html_file, stats_hash, 'rigth')
+		html_file.puts '</div>'
+		html_file.puts '<div style="overflow: hidden; width: 950px; height: 750px; padding: 10 ">'
+		taxonomy_graph(html_file, 'left')
+		database_report(html_file, stats_hash, 'rigth')
+		html_file.puts '</div>'
+		# END TABLES
+		html_file.puts '</center>', '</body>' # End body
+	end
-			end
-			status_array = [coding_stats, p_coding_stats, unknown_stats]
-			return [status_array, seqs_number, uni_500, uni_200, longest_one]
-		end
+	def reptrans_report(html_file, stats_hash, align)
+		html = []
+		all_seqs = 0
+		stats_hash.values.map{|v| all_seqs += v}
+		html << '<div style=" margin: 0; float:'+align+'">'
+		html << table_title('Sequences info')
+		html.concat(table_header(['', 'Sequences', '%'], 0))
+		html.concat(single_row('Output', all_seqs, all_seqs))
+		html.concat(single_row('Annotated with protein', stats_hash['prot_annotated'], all_seqs))
+		html.concat(single_row('Annotated with EST', stats_hash['est_annotated'], all_seqs))
+		html.concat(single_row('Coding test-code > 1', stats_hash['coding_>1'], all_seqs))
+		html.concat(single_row('Coding test-code > 0.94', stats_hash['coding_>0.94'], all_seqs))
+		html.concat(single_row('Coding test-code > 0.84', stats_hash['coding_>0.84'], all_seqs))
+		html.concat(single_row('Coding test-code > 0.73', stats_hash['coding_>0.73'], all_seqs))
+		html.concat(single_row('Coding test-code > 0', stats_hash['coding_>0'], all_seqs))
+		html << '</table>'
+		html << '</div>'
+		write_array_html(html, html_file)
+	end
+	def reptrans_acumulative_report(html_file, stats_hash, align)
+		html = []
+		all_seqs = 0
+		stats_hash.values.map{|v| all_seqs += v}
+		html << '<div style=" margin: 0; float:'+align+'">'
+		html << table_title('Sequences summary (Acumulative)')
+		html.concat(table_header(['', 'Sequences', '%'], 0))
+		acumulative = 0
+		html.concat(single_row('Annotated with protein', stats_hash['prot_annotated'], all_seqs))
+		acumulative += stats_hash['prot_annotated']
+		html.concat(single_row('Annotated with EST', stats_hash['est_annotated'] + acumulative, all_seqs))
+		acumulative += stats_hash['est_annotated']
+		html.concat(single_row('Coding test-code > 1', stats_hash['coding_>1'] + acumulative, all_seqs))
+		acumulative += stats_hash['coding_>1']
+		html.concat(single_row('Coding test-code > 0.94', stats_hash['coding_>0.94'] + acumulative, all_seqs))
+		acumulative += stats_hash['coding_>0.94']
+		html.concat(single_row('Coding test-code > 0.84', stats_hash['coding_>0.84'] + acumulative, all_seqs))
+		acumulative += stats_hash['coding_>0.84']
+		html.concat(single_row('Coding test-code > 0.73', stats_hash['coding_>0.73'] + acumulative, all_seqs))
+		html << '</table>'
+		html << '</div>'
+		write_array_html(html, html_file)
+	end
+	def general_report(html_file, stats_hash, align)
+		html = []
+		html << '<div  style="margin: 0; float:'+align+'">'
+		html << table_title('General info')
+		html.concat(table_header(['', 'Sequences', '%'], 0))
+		html.concat(single_row('Input', stats_hash['input_seqs'], stats_hash['input_seqs']))
+		html.concat(single_row('Failing sequences', stats_hash['failed'], stats_hash['output_seqs']))
+		html.concat(single_row('Artifacts <sup>1</sup>', stats_hash['artifacts'], stats_hash['output_seqs']))
+		html.concat(single_row('Misassembled', stats_hash['misassembled'], stats_hash['artifacts'], TRUE))
+		html.concat(single_row('Chimeras', stats_hash['chimeras'], stats_hash['artifacts'], TRUE))
+		html.concat(single_row('Other', stats_hash['other_artifacts'], stats_hash['artifacts'], TRUE))
+		html.concat(single_row('Sequences with resolved chimeras', stats_hash['output_seqs'], stats_hash['input_seqs']))
+		html.concat(single_row('Sequences without artifacts', stats_hash['good_seqs'], stats_hash['output_seqs']))
+		html.concat(single_row('BA index', "%5.2f" % [stats_hash['BA_index']], nil)) if stats_hash['BA_index'] > 0
+		html << '</table>'
+		html << '</div>'
+		write_array_html(html, html_file)
+	end
-		def ncrna_stats(size_filter1,size_filter2)
+	def taxonomy_graph(html_file, align)
+		html_file.puts '<div style=\'float:'+align+'\'>'
+		html_file.puts table_title('Taxonomy distribution on annotations')
+		html_file.puts '<table >
+				<tr>
+					<td>
+						<canvas id=\'taxonomy\' width=\'540\' height=\'640\'></canvas>
+					</td>
+				</tr>
+			</table>
+		</div>'
+	end
-			uni_500 = 0
-			uni_200 = 0
-			nc_total = 0
-			longest_one = 0
-			File.open('fln_results/nc_rnas.txt').each do |line|
-				line.chomp!
-				(name,fasta_length,acc,db_name,status) = line.split("\t")
+	def database_report(html_file, stats_hash, align)
+		html = []
+		html << '<div style=" margin: 0 float:'+align+'">'
+		html << table_title('Database usage')
+		html.concat(table_header(['', 'Unigenes', '%'], 0))
+		html.concat(single_row('UserDB', stats_hash['userdb'], stats_hash['good_seqs']))
+		html.concat(single_row('SwissProt', stats_hash['swissprot'], stats_hash['good_seqs']))
+		html.concat(single_row('TrEMBL', stats_hash['trembl'], stats_hash['good_seqs']))
+		html.concat(single_row('ncRNA', stats_hash['ncrna'], stats_hash['good_seqs']))
+		html.concat(single_row('None', stats_hash['coding']+ stats_hash['unknown'], stats_hash['good_seqs']))
+		html.concat(single_row('Total', stats_hash['good_seqs'], stats_hash['good_seqs']))
+		html << '</table>'
+		html << '</div>'
+		write_array_html(html, html_file)
+	end
-				if (status == 'Putative ncRNA')
+	def assembly_report(html_file, stats_hash, align)
+		html = []
+		html << '<div style=" margin: 0; float:'+align+'">'
+		html << table_title('Report guiding assembly quality')
+		html.concat(table_header(['', 'Unigenes', '%'], 0))
+		html.concat(single_row('Unigenes', stats_hash['good_seqs'], stats_hash['good_seqs']))
+		html.concat(single_row('Unigenes >500pb', stats_hash['sequences_>500'], stats_hash['good_seqs']))
+		html.concat(single_row('Unigenes >200pb', stats_hash['sequences_>200'], stats_hash['good_seqs']))
+		html.concat(single_row('Longest unigene', stats_hash['longest_unigene'], nil))
+		html.concat(single_row('With orthologue <sup>1</sup>', stats_hash['prot_annotated'], stats_hash['good_seqs']))
+		html.concat(single_row('Different orthologue IDs', stats_hash['different_orthologues'], stats_hash['prot_annotated'], TRUE))
+		html.concat(single_row('Complete transcripts', stats_hash['complete'], stats_hash['prot_annotated'], TRUE))
+		html.concat(single_row('Different complete transcripts ', stats_hash['different_completes'], stats_hash['prot_annotated'], TRUE))
+		html.concat(single_row('ncRNA', stats_hash['ncrna'], stats_hash['good_seqs']))
+		without_orthologue = stats_hash['coding']+ stats_hash['unknown']
+		html.concat(single_row('Without orthologue <sup>1</sup>', without_orthologue, stats_hash['good_seqs']))
+		html.concat(single_row('Coding (all)', stats_hash['coding'], without_orthologue, TRUE))
+		html.concat(single_row('Coding > 200bp', stats_hash['coding_>200'], without_orthologue, TRUE))
+		html.concat(single_row('Coding > 500bp', stats_hash['coding_>500'], without_orthologue, TRUE))
+		html.concat(single_row('Unknown (all)', stats_hash['unknown'], without_orthologue, TRUE))
+		html.concat(single_row('Unknown > 200bp', stats_hash['unknown_>200'], without_orthologue, TRUE))
+		html.concat(single_row('Unknown > 500bp', stats_hash['unknown_>500'], without_orthologue, TRUE))
+		html << '</table>'
+		html << '<sup>1</sup> Percents for subclassifications of this category <br> were calculated using this line as 100% reference.'
+		html << '</div>'
+		write_array_html(html, html_file)
+	end
-					if (fasta_length.to_i > longest_one)
-						longest_one = fasta_length.to_i
-					end
-					# -------------------------------------------------------------------------
-					if (fasta_length.to_i >= size_filter1)
-						uni_200 += 1
-					end
-					if (fasta_length.to_i >= size_filter2)
-						uni_500 += 1
-					end
-					# -------------------------------------------------------------------------
+	def status_graph(html_file, align)
+		html_file.puts '<div style=\'float:'+align+'\'>'
+		html_file.puts table_title('Structural profile')
+		html_file.puts '<table >
+				<tr>
+					<td>
+						<canvas id=\'profile\' width=\'500\' height=\'440\'></canvas>
+					</td>
+				</tr>
+			</table>
+		</div>'
-					nc_total += 1
+	end
-				end
+	def status_report(html_file, stats_hash, align)
+		html = []
+		html << '<div style=" margin: 0; float:'+align+'">'
+		html << table_title('Status report')
+		html.concat(table_header(['Status', 'Unigenes', '%'], 2))
+		html.concat(fused_row('Complete', stats_hash['complete_sure'], stats_hash['complete_putative'], stats_hash['good_seqs']))
+		html.concat(fused_row('C-terminus', stats_hash['c_terminal_sure'], stats_hash['c_terminal_putative'], stats_hash['good_seqs']))
+		html.concat(fused_row('N-terminus', stats_hash['n_terminal_sure'], stats_hash['n_terminal_putative'], stats_hash['good_seqs']))
+		html.concat(composed_single_row('Internal', stats_hash['internal'], stats_hash['good_seqs']))
+		html.concat(fused_row('Coding', stats_hash['coding_sure'], stats_hash['coding_putative'], stats_hash['good_seqs']))
+		html.concat(composed_single_row('ncRNA', stats_hash['ncrna'], stats_hash['good_seqs']))
+		html.concat(composed_single_row('Unknown', stats_hash['unknown'], stats_hash['good_seqs']))
+		html.concat(composed_single_row('Total', stats_hash['good_seqs'], stats_hash['good_seqs']))
+		html << '</table>'
+		html << '</div>'
+		write_array_html(html, html_file)
+	end
+	def table_title(title)
+		html = 	'<div style="font-size:25px; margin: 10"><b>'+title+'</b></div>'
+		return html
+	end
+	def table_header(col_array, colspan)
+		html = []
+		html << '<table border="2" cellspacing="0" cellpadding="2">'
+		# Table header
+		html << '<tr>'
+		col_array.each_with_index do |col,i|
+			if i == 0 && colspan > 0
+				html <<	'<th colspan="'+colspan.to_s+'">'+col+'</th>'
+			else
+				html <<	'<th>'+col+'</th>'
 			end
+		end
+		html <<	'</tr>'
+		return html
+	end
-			return [nc_total, uni_500, uni_200, longest_one]
+	def single_row(name, magnitude, total, space = FALSE)
+		if space
+			name = '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'+ name
 		end
+		html = []
+		html << '<tr>'
+		html << '<td align="left">'+name+'</td>'
+		html.concat(sub_row(magnitude, total))
+		html << '</tr>'
+		return html
+	end
-		def chimera_stats(size_filter1,size_filter2)
-			uni_500 = 0
-			uni_200 = 0
-			ch_total = 0
-			longest_one = 0
-			db_usage = [0,0,0]
-			if !File.exists?('fln_results/chimeric_sequences.txt')
-				return [0, 0, 0, longest_one, db_usage]
+	def fused_row(type, sure_magnitude, putative_magnitude, total)
+		html = []
+		html << '<td rowspan="2" align="left">'+type+'</td>'
+		html <<	seq_status('Sure')
+		html.concat(sub_row(sure_magnitude, total))
+		html << '</tr>'
+		html << '<tr>'
+		html <<	seq_status('Putative')
+		html.concat(sub_row(putative_magnitude, total))
+		html << '</tr>'
+		return html
+	end
+	def seq_status(status)
+		html = '<td align="left">'+status+'</td>'
+		return html
+	end
+	def sub_row(magnitude, total)
+		if !total.nil?
+			perc_float = magnitude*100.0/total
+			if !perc_float.nan?
+				percentage = '%.2f' % perc_float.to_s
+				percentage += '%'
 			else
-				File.open('fln_results/chimeric_sequences.txt').each do |line|
-					line.chomp!
-					if (!line.empty?)
-						(name,fasta_length,acc,db_name,status) = line.split("\t")
-						if (status == 'Putative chimera')
-							if (fasta_length.to_i > longest_one)
-								longest_one = fasta_length.to_i
-							end
-							# -------------------------------------------------------------------------
-							if (fasta_length.to_i >= size_filter1)
-								uni_200 += 1
-							end
-							if (fasta_length.to_i >= size_filter2)
-								uni_500 += 1
-							end
-							# -------------------------------------------------------------------------
-							if (db_name =~ /^sp_/)
-								db_usage[1] += 1
-							elsif (db_name =~ /^tr_/)
-								db_usage[2] += 1
-							else
-								db_usage[0] += 1
-							end
-							# -------------------------------------------------------------------------
-							ch_total += 1
-						end
-					end
-				end
-				db_usage.each_with_index do |db,i|
-					db_usage[i] = db/2
-				end
-				return [(ch_total/2), (uni_500/2), (uni_200/2), longest_one, db_usage]
+				percentage ='-'
 			end
-		end
+		else
+			percentage = '-'
+		end
+		html = []
+		html << '<td align="right">'+magnitude.to_s+'</td>'
+		html << '<td align="right">'+percentage+'</td>'
+		return html
+	end
+	def composed_single_row(type, magnitude, total)
+		html = []
+		html <<	'<tr>'
+		html << '<td colspan="2" align="left">'+type+'</td>'
+		html.concat(sub_row(magnitude, total))
+		html <<	'</tr>'
+		return html
+	end
+	def write_array_html(html, html_file)
+		html.map{|line| html_file.puts line}
+	end
 end