full_lengther_next 0.6.2 → 0.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/{README.rdoc → README.md} +0 -0
- data/Rakefile +6 -37
- data/bin/console +14 -0
- data/bin/download_fln_dbs.rb +2 -7
- data/bin/full_lengther_next +85 -6
- data/bin/make_user_db.rb +13 -5
- data/bin/setup +8 -0
- data/full_lengther_next.gemspec +42 -0
- data/lib/full_lengther_next.rb +2 -10
- data/lib/full_lengther_next/artifacts.rb +74 -0
- data/lib/full_lengther_next/{classes/blast_functions.rb → blast_functions.rb} +0 -0
- data/lib/full_lengther_next/{classes/cdhit.rb → cdhit.rb} +0 -0
- data/lib/full_lengther_next/{classes/chimeric_seqs.rb → chimeric_seqs.rb} +0 -0
- data/lib/full_lengther_next/{classes/common_functions.rb → common_functions.rb} +0 -0
- data/lib/full_lengther_next/{classes/exonerate_result.rb → exonerate_result.rb} +0 -0
- data/lib/full_lengther_next/{classes/fl_analysis.rb → fl_analysis.rb} +0 -0
- data/lib/full_lengther_next/{classes/fl_string_utils.rb → fl_string_utils.rb} +0 -0
- data/lib/full_lengther_next/fln_stats.rb +613 -0
- data/lib/full_lengther_next/go_methods.rb +42 -0
- data/lib/full_lengther_next/{classes/handle_db.rb → handle_db.rb} +0 -0
- data/lib/full_lengther_next/mapping.rb +296 -0
- data/lib/full_lengther_next/{classes/my_worker.rb → my_worker.rb} +71 -9
- data/lib/full_lengther_next/{classes/my_worker_EST.rb → my_worker_EST.rb} +0 -0
- data/lib/full_lengther_next/{classes/my_worker_manager_EST.rb → my_worker_manager_EST.rb} +0 -0
- data/lib/full_lengther_next/{classes/my_worker_manager_fln.rb → my_worker_manager_fln.rb} +181 -16
- data/lib/full_lengther_next/{classes/nc_rna.rb → nc_rna.rb} +0 -0
- data/lib/full_lengther_next/{classes/orf.rb → orf.rb} +0 -0
- data/lib/full_lengther_next/{classes/reptrans.rb → reptrans.rb} +9 -5
- data/lib/full_lengther_next/{classes/sequence.rb → sequence.rb} +26 -1
- data/lib/full_lengther_next/{classes/test_code.rb → test_code.rb} +1 -1
- data/lib/full_lengther_next/{classes/types.rb → types.rb} +3 -2
- data/lib/full_lengther_next/{classes/une_los_hit.rb → une_los_hit.rb} +0 -0
- data/lib/full_lengther_next/version.rb +3 -0
- data/lib/full_lengther_next/{classes/warnings.rb → warnings.rb} +0 -0
- data/report_templates/general_summary.erb +140 -0
- data/report_templates/mapping_summary.erb +98 -0
- data/report_templates/reptrans_summary.erb +32 -0
- metadata +112 -134
- data/.gemtest +0 -0
- data/History.txt +0 -32
- data/Manifest.txt +0 -44
- data/PostInstall.txt +0 -6
- data/bin/plot_fln.rb +0 -270
- data/bin/plot_taxonomy.rb +0 -70
- data/lib/expresscanvas.zip +0 -0
- data/lib/full_lengther_next/classes/artifacts.rb +0 -66
- data/lib/full_lengther_next/classes/fln_stats.rb +0 -641
- data/script/console +0 -10
- data/script/destroy +0 -14
- data/script/generate +0 -14
- data/test/test_full_lengther_next.rb +0 -11
- data/test/test_helper.rb +0 -3
    
        data/lib/full_lengther_next.rb
    CHANGED
    
    | @@ -1,13 +1,5 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
            $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
         | 
| 3 | 
            -
             | 
| 4 | 
            -
            # ROOT_PATH=File.join(File.dirname(__FILE__),'full_lengther_next')
         | 
| 5 | 
            -
             | 
| 6 | 
            -
            $: << File.expand_path(File.join(File.dirname(__FILE__), 'full_lengther_next', 'classes'))
         | 
| 7 | 
            -
             | 
| 1 | 
            +
            require "full_lengther_next/version"
         | 
| 8 2 |  | 
| 9 3 | 
             
            module FullLengtherNext
         | 
| 10 | 
            -
               | 
| 11 | 
            -
              
         | 
| 12 | 
            -
              FULL_LENGHTER_VERSION = VERSION
         | 
| 4 | 
            +
              # Your code goes here...
         | 
| 13 5 | 
             
            end
         | 
| @@ -0,0 +1,74 @@ | |
| 1 | 
            +
            require 'blast_functions'
         | 
| 2 | 
            +
            require 'types'
         | 
| 3 | 
            +
            require 'chimeric_seqs'
         | 
| 4 | 
            +
            include ChimericSeqs
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            #####################################################################
         | 
| 7 | 
            +
            ## MAIN FUNCTION
         | 
| 8 | 
            +
            #####################################################################
         | 
| 9 | 
            +
            def artifact?(seq, query, db_name, db_path, options, new_seqs)
         | 
| 10 | 
            +
            	artifact = FALSE
         | 
| 11 | 
            +
            	# UNMAPPED CONTIG DETECTION
         | 
| 12 | 
            +
            	if query.nil? && seq.unmapped? #If seq is misassembled stop chimera analisys
         | 
| 13 | 
            +
            		seq.hit = nil
         | 
| 14 | 
            +
            		artifact = TRUE
         | 
| 15 | 
            +
            		seq.type = UNMAPPED
         | 
| 16 | 
            +
            	end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            	if !query.nil?
         | 
| 19 | 
            +
            		# MISASSEMBLED DETECTION
         | 
| 20 | 
            +
            		if !artifact && misassembled_detection(query) #If seq is misassembled stop chimera analisys
         | 
| 21 | 
            +
            			seq.hit = query.hits.first
         | 
| 22 | 
            +
            			artifact = TRUE
         | 
| 23 | 
            +
            			seq.type = MISASSEMBLED
         | 
| 24 | 
            +
            			seq.warnings('ERROR#1')
         | 
| 25 | 
            +
            		end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            		# OVERLAPPING HSPS ON SUBJECT DETECTION
         | 
| 28 | 
            +
            =begin
         | 
| 29 | 
            +
            		if !artifact
         | 
| 30 | 
            +
            			hit_reference = query.hits.first.dup
         | 
| 31 | 
            +
            			query, overlapping = overlapping_hsps_on_subject(query)
         | 
| 32 | 
            +
            			if overlapping
         | 
| 33 | 
            +
            				if query.hits.first.nil?
         | 
| 34 | 
            +
            					seq.hit = hit_reference
         | 
| 35 | 
            +
            				else
         | 
| 36 | 
            +
            					seq.hit = query.hits.first
         | 
| 37 | 
            +
            				end
         | 
| 38 | 
            +
            				artifact = TRUE
         | 
| 39 | 
            +
            				seq.type = OTHER
         | 
| 40 | 
            +
            				seq.warnings('ERROR#2')
         | 
| 41 | 
            +
            			end
         | 
| 42 | 
            +
            		end
         | 
| 43 | 
            +
            =end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            		# MULTIPLE HSP DETECTION
         | 
| 46 | 
            +
            		if !artifact && multiple_hsps(query, 3)   
         | 
| 47 | 
            +
            			seq.hit = query.hits.first
         | 
| 48 | 
            +
            			seq.warnings('ERROR#3')
         | 
| 49 | 
            +
            		end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            		# CHIMERA DETECTION
         | 
| 52 | 
            +
            		if !artifact && !options[:chimera].include?('d')  
         | 
| 53 | 
            +
            			chimera = search_chimeras(seq, query, options, db_name, db_path)			
         | 
| 54 | 
            +
            			if !chimera.nil?   
         | 
| 55 | 
            +
            				new_seqs.concat(chimera)
         | 
| 56 | 
            +
            				seq.db_name = db_name
         | 
| 57 | 
            +
            				seq.type = CHIMERA
         | 
| 58 | 
            +
            				artifact = TRUE
         | 
| 59 | 
            +
            			end
         | 
| 60 | 
            +
            		end
         | 
| 61 | 
            +
            	end
         | 
| 62 | 
            +
            	if artifact
         | 
| 63 | 
            +
            		if $verbose > 1
         | 
| 64 | 
            +
            			puts seq.prot_annot_calification
         | 
| 65 | 
            +
            		end
         | 
| 66 | 
            +
            		seq.db_name = db_name
         | 
| 67 | 
            +
            		seq.save_fasta = FALSE
         | 
| 68 | 
            +
            		seq.ignore = TRUE
         | 
| 69 | 
            +
            	end
         | 
| 70 | 
            +
            	return artifact
         | 
| 71 | 
            +
            end
         | 
| 72 | 
            +
             | 
| 73 | 
            +
             | 
| 74 | 
            +
             | 
| 
            File without changes
         | 
| 
            File without changes
         | 
| 
            File without changes
         | 
| 
            File without changes
         | 
| 
            File without changes
         | 
| 
            File without changes
         | 
| 
            File without changes
         | 
| @@ -0,0 +1,613 @@ | |
| 1 | 
            +
            require 'report_html'
         | 
| 2 | 
            +
            require 'types.rb'
         | 
| 3 | 
            +
            require 'go_methods'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module FlnStats
         | 
| 6 | 
            +
            	REPORT_FOLDER = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'report_templates'))
         | 
| 7 | 
            +
            	def initialize_stats_hash
         | 
| 8 | 
            +
            		stats_hash = {
         | 
| 9 | 
            +
            			'input_seqs' => 0,
         | 
| 10 | 
            +
            			'output_seqs' => 0,
         | 
| 11 | 
            +
            			'failed' => 0,
         | 
| 12 | 
            +
            			'full_transcriptome_length' => 0,
         | 
| 13 | 
            +
            			'PRE_FLN_full_transcriptome_length' => 0,
         | 
| 14 | 
            +
            			'mean_length' => 0,
         | 
| 15 | 
            +
            			'PRE_FLN_mean_length' => 0,
         | 
| 16 | 
            +
            			'indeterminations' => 0,
         | 
| 17 | 
            +
            			'PRE_FLN_indeterminations' => 0,
         | 
| 18 | 
            +
            			'gap_number' => 0,
         | 
| 19 | 
            +
            			'PRE_FLN_gap_number' => 0,
         | 
| 20 | 
            +
            			'indetermination_mean_length' => 0,
         | 
| 21 | 
            +
            			'PRE_FLN_indetermination_mean_length' => 0,
         | 
| 22 | 
            +
            			'sequences_>200' => 0,
         | 
| 23 | 
            +
            			'sequences_>500' => 0,
         | 
| 24 | 
            +
            			'PRE_FLN_sequences_>500' => 0,
         | 
| 25 | 
            +
            			'longest_unigene' => 0,
         | 
| 26 | 
            +
            			'n50' => 0,
         | 
| 27 | 
            +
            			'PRE_FLN_n50' => 0,
         | 
| 28 | 
            +
            			'n90' => 0,
         | 
| 29 | 
            +
            			'PRE_FLN_n90' => 0,
         | 
| 30 | 
            +
            			'good_seqs' => 0,
         | 
| 31 | 
            +
            			'artifacts' => 0,
         | 
| 32 | 
            +
            			'misassembled' => 0,
         | 
| 33 | 
            +
            			'chimeras' => 0,
         | 
| 34 | 
            +
            			'unmapped' => 0,
         | 
| 35 | 
            +
            			'other_artifacts' => 0,
         | 
| 36 | 
            +
            			'unknown' => 0,
         | 
| 37 | 
            +
            			'unknown_>200' => 0,
         | 
| 38 | 
            +
            			'unknown_>500' => 0,
         | 
| 39 | 
            +
            			'prot_annotated' => 0,
         | 
| 40 | 
            +
            			'complete' => 0,
         | 
| 41 | 
            +
            			'complete_sure' => 0,
         | 
| 42 | 
            +
            			'complete_putative' => 0,
         | 
| 43 | 
            +
            			'n_terminal' => 0,
         | 
| 44 | 
            +
            			'n_terminal_sure' => 0,
         | 
| 45 | 
            +
            			'n_terminal_putative' => 0,
         | 
| 46 | 
            +
            			'c_terminal' => 0,
         | 
| 47 | 
            +
            			'c_terminal_sure' => 0,
         | 
| 48 | 
            +
            			'c_terminal_putative' => 0,
         | 
| 49 | 
            +
            			'internal' => 0,
         | 
| 50 | 
            +
            			'swissprot' => 0,
         | 
| 51 | 
            +
            			'trembl' => 0,
         | 
| 52 | 
            +
            			'userdb' => 0,
         | 
| 53 | 
            +
            			'ncrna' => 0,
         | 
| 54 | 
            +
            			'coding' => 0,
         | 
| 55 | 
            +
            			'coding_sure' => 0,
         | 
| 56 | 
            +
            			'coding_putative' => 0,
         | 
| 57 | 
            +
            			'coding_>200' => 0,
         | 
| 58 | 
            +
            			'coding_>500' => 0,
         | 
| 59 | 
            +
            			'different_orthologues' => 0,
         | 
| 60 | 
            +
            			'different_completes' => 0,
         | 
| 61 | 
            +
            			'BA_index' => 0
         | 
| 62 | 
            +
            		}		
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            		return stats_hash
         | 
| 65 | 
            +
            	end
         | 
| 66 | 
            +
             | 
| 67 | 
            +
            	def get_taxonomy(name, taxonomy)
         | 
| 68 | 
            +
            		organism = nil
         | 
| 69 | 
            +
            		if name.include?('OS=')
         | 
| 70 | 
            +
            			fields = name.split('OS=',2)
         | 
| 71 | 
            +
            			organism = fields.last.split(' GN=').first.strip
         | 
| 72 | 
            +
            		elsif name[0..2] = 'sp=' || name[0..2] = 'tr='
         | 
| 73 | 
            +
            			name =~ /(\w+ \w+) \(([\w ]+)\) \(([\w ]+)\)/
         | 
| 74 | 
            +
            			if !$1.nil?
         | 
| 75 | 
            +
            				organism = $1
         | 
| 76 | 
            +
            			else
         | 
| 77 | 
            +
            				name =~ /(\w+ \w+) \(([\w ]+)\)/
         | 
| 78 | 
            +
            				if !$1.nil?
         | 
| 79 | 
            +
            					organism = $1  
         | 
| 80 | 
            +
            				end
         | 
| 81 | 
            +
            			end
         | 
| 82 | 
            +
            		else
         | 
| 83 | 
            +
            			organism = name.split(";",2).last
         | 
| 84 | 
            +
            			organism = organism.split('.', 2).first
         | 
| 85 | 
            +
            			organism.gsub!(/\(\D+\)/,'')
         | 
| 86 | 
            +
            			if organism.split(' ').length > 1
         | 
| 87 | 
            +
            				organism.gsub!('.','')
         | 
| 88 | 
            +
            				organism.gsub!(/^ /,'')
         | 
| 89 | 
            +
            				organism.gsub!('  ','')
         | 
| 90 | 
            +
            				organism.strip!
         | 
| 91 | 
            +
            			end
         | 
| 92 | 
            +
            		end
         | 
| 93 | 
            +
            		if !organism.nil?
         | 
| 94 | 
            +
            			organism = organism.split(' ')[0..1].join(' ')
         | 
| 95 | 
            +
            			if taxonomy[organism].nil?
         | 
| 96 | 
            +
            				taxonomy[organism] = 1
         | 
| 97 | 
            +
            			else
         | 
| 98 | 
            +
            				taxonomy[organism] += 1
         | 
| 99 | 
            +
            			end	
         | 
| 100 | 
            +
            		end
         | 
| 101 | 
            +
            	end
         | 
| 102 | 
            +
             | 
| 103 | 
            +
            	def initialize_stats_hash_reptrans
         | 
| 104 | 
            +
            		stats_hash = {
         | 
| 105 | 
            +
            			'prot_annotated' => 0,
         | 
| 106 | 
            +
            			'est_annotated' => 0,
         | 
| 107 | 
            +
            			'coding_>1' => 0,
         | 
| 108 | 
            +
            			'coding_>0.94' => 0,
         | 
| 109 | 
            +
            			'coding_>0.84' => 0,
         | 
| 110 | 
            +
            			'coding_>0.73' => 0,
         | 
| 111 | 
            +
            			'coding_>0' => 0
         | 
| 112 | 
            +
            		}
         | 
| 113 | 
            +
            		return stats_hash
         | 
| 114 | 
            +
            	end
         | 
| 115 | 
            +
             | 
| 116 | 
            +
            	# Extract sequence stats
         | 
| 117 | 
            +
            	##################################################
         | 
| 118 | 
            +
            	def sequence_stats(seq, stats_hash)
         | 
| 119 | 
            +
            		nt_seq = seq.seq_fasta
         | 
| 120 | 
            +
            		stats_hash['input_seqs'] += 1
         | 
| 121 | 
            +
            		stats_hash['PRE_FLN_sequences_>500'] += 1 if nt_seq.length >= 500
         | 
| 122 | 
            +
            		stats_hash['PRE_FLN_full_transcriptome_length'] += nt_seq.length
         | 
| 123 | 
            +
            		stats_hash['PRE_FLN_indeterminations'] += (nt_seq.count('n') + nt_seq.count('N'))
         | 
| 124 | 
            +
            		stats_hash['PRE_FLN_gap_number'] += nt_seq.scan(/[nN]+/).length
         | 
| 125 | 
            +
            	end
         | 
| 126 | 
            +
             | 
| 127 | 
            +
            	# Build final stats
         | 
| 128 | 
            +
            	####################################################
         | 
| 129 | 
            +
            	def summary_stats(seqs, stats_hash, diff_ids_array, diff_ids_complete_array, all_seq_lengths)
         | 
| 130 | 
            +
            		low_limit = 200
         | 
| 131 | 
            +
            		upper_limit = 500
         | 
| 132 | 
            +
            		#All seqs
         | 
| 133 | 
            +
            		#-----------
         | 
| 134 | 
            +
            		stats_hash['output_seqs'] += seqs.length
         | 
| 135 | 
            +
            		good_seqs = seqs.select{|s| s.type >= UNKNOWN}
         | 
| 136 | 
            +
            		stats_hash['good_seqs'] += good_seqs.length
         | 
| 137 | 
            +
             | 
| 138 | 
            +
            		#Indeterminations
         | 
| 139 | 
            +
            		if !good_seqs.empty?
         | 
| 140 | 
            +
            			stats_hash['indeterminations'] += good_seqs.map{|s| s.seq_fasta.count('n') + s.seq_fasta.count('N')}.inject { |sum, n| sum + n }
         | 
| 141 | 
            +
            			stats_hash['gap_number'] += good_seqs.map{|s| s.seq_fasta.scan(/[nN]+/).length}.inject { |sum, n| sum + n }
         | 
| 142 | 
            +
            		end
         | 
| 143 | 
            +
             | 
| 144 | 
            +
            		#Longest_unigene
         | 
| 145 | 
            +
            		current_longest_unigene = seqs.map{|s| s.fasta_length}.max
         | 
| 146 | 
            +
            		if current_longest_unigene > stats_hash['longest_unigene']
         | 
| 147 | 
            +
            			stats_hash['longest_unigene'] = current_longest_unigene
         | 
| 148 | 
            +
            		end
         | 
| 149 | 
            +
             | 
| 150 | 
            +
            		#Load ids
         | 
| 151 | 
            +
            		seqs.map{|s| 
         | 
| 152 | 
            +
            			if s.type > UNKNOWN && s.type < NCRNA
         | 
| 153 | 
            +
            				diff_ids_array << s.hit.acc
         | 
| 154 | 
            +
            			end}
         | 
| 155 | 
            +
            		diff_ids_array.uniq!
         | 
| 156 | 
            +
             | 
| 157 | 
            +
            		#By Length
         | 
| 158 | 
            +
            		if !good_seqs.empty?
         | 
| 159 | 
            +
            			seq_lengths = good_seqs.map{|s| s.fasta_length }
         | 
| 160 | 
            +
            			all_seq_lengths.concat(seq_lengths)
         | 
| 161 | 
            +
            			stats_hash['full_transcriptome_length'] += seq_lengths.inject { |sum, n| sum + n }
         | 
| 162 | 
            +
            			stats_hash['sequences_>200'] += seq_lengths.select{|l| l > low_limit}.length
         | 
| 163 | 
            +
            			stats_hash['sequences_>500'] += seq_lengths.select{|l| l > upper_limit}.length
         | 
| 164 | 
            +
            		end
         | 
| 165 | 
            +
             | 
| 166 | 
            +
            		stats_hash['failed'] += seqs.select{|s| s.type == FAILED}.length
         | 
| 167 | 
            +
             | 
| 168 | 
            +
            		#Unknown
         | 
| 169 | 
            +
            		#-----------------------------
         | 
| 170 | 
            +
            		all_unknown = seqs.select{|s| s.type == UNKNOWN}
         | 
| 171 | 
            +
            		stats_hash['unknown'] += all_unknown.length
         | 
| 172 | 
            +
             | 
| 173 | 
            +
            		#By Length
         | 
| 174 | 
            +
            		stats_hash['unknown_>200'] += all_unknown.select{|s| s.fasta_length > low_limit}.length
         | 
| 175 | 
            +
            		stats_hash['unknown_>500'] += all_unknown.select{|s| s.fasta_length > upper_limit}.length
         | 
| 176 | 
            +
             | 
| 177 | 
            +
            		#Artifacts
         | 
| 178 | 
            +
            		#----------------
         | 
| 179 | 
            +
            		stats_hash['artifacts'] += seqs.select{|s| s.type < UNKNOWN && s.type > FAILED}.length
         | 
| 180 | 
            +
            		stats_hash['misassembled'] += seqs.select{|s| s.type == MISASSEMBLED}.length
         | 
| 181 | 
            +
            		stats_hash['unmapped'] += seqs.select{|s| s.type == UNMAPPED}.length
         | 
| 182 | 
            +
            		stats_hash['chimeras'] += seqs.select{|s| s.type == CHIMERA && !s.seq_name.include?('_split_')}.length # We don't want count a multiple chimera
         | 
| 183 | 
            +
            		stats_hash['other_artifacts'] += seqs.select{|s| s.type == OTHER}.length
         | 
| 184 | 
            +
            		
         | 
| 185 | 
            +
            		#Annotated with prot
         | 
| 186 | 
            +
            		#---------------------
         | 
| 187 | 
            +
            		prot_annotated = seqs.select{|s| s.type >= COMPLETE && s.type <= INTERNAL}
         | 
| 188 | 
            +
            		stats_hash['prot_annotated'] += prot_annotated.length
         | 
| 189 | 
            +
             | 
| 190 | 
            +
            		#By annotation
         | 
| 191 | 
            +
            		stats_hash['internal'] += seqs.select{|s| s.type == INTERNAL}.length
         | 
| 192 | 
            +
            		complete = seqs.select{|s| s.type == COMPLETE}
         | 
| 193 | 
            +
            		n_terminal = seqs.select{|s| s.type == N_TERMINAL}
         | 
| 194 | 
            +
            		c_terminal = seqs.select{|s| s.type == C_TERMINAL}
         | 
| 195 | 
            +
             | 
| 196 | 
            +
            		stats_hash['complete'] += complete.length
         | 
| 197 | 
            +
            		stats_hash['n_terminal'] += n_terminal.length
         | 
| 198 | 
            +
            		stats_hash['c_terminal'] += c_terminal.length
         | 
| 199 | 
            +
             | 
| 200 | 
            +
            		#Load  complete ids
         | 
| 201 | 
            +
            		complete.map{|s| diff_ids_complete_array << s.hit.acc}
         | 
| 202 | 
            +
            		diff_ids_complete_array.uniq!
         | 
| 203 | 
            +
             | 
| 204 | 
            +
            		#----> By Status
         | 
| 205 | 
            +
            		stats_hash['complete_sure'] += complete.select{|s| s.status}.length
         | 
| 206 | 
            +
            		stats_hash['n_terminal_sure'] += n_terminal.select{|s| s.status}.length
         | 
| 207 | 
            +
            		stats_hash['c_terminal_sure'] += c_terminal.select{|s| s.status}.length
         | 
| 208 | 
            +
            		stats_hash['complete_putative'] += complete.select{|s| !s.status}.length
         | 
| 209 | 
            +
            		stats_hash['n_terminal_putative'] += n_terminal.select{|s| !s.status}.length
         | 
| 210 | 
            +
            		stats_hash['c_terminal_putative'] += c_terminal.select{|s| !s.status}.length
         | 
| 211 | 
            +
            			
         | 
| 212 | 
            +
            		#By database
         | 
| 213 | 
            +
            		swissprot = prot_annotated.select{|s| s.db_name =~ /^sp_/}.length
         | 
| 214 | 
            +
            		trembl = prot_annotated.select{|s| s.db_name =~ /^tr_/}.length
         | 
| 215 | 
            +
            		stats_hash['swissprot'] += swissprot
         | 
| 216 | 
            +
            		stats_hash['trembl'] += trembl
         | 
| 217 | 
            +
            		stats_hash['userdb'] += prot_annotated.length - swissprot - trembl
         | 
| 218 | 
            +
             | 
| 219 | 
            +
            		#ncRNA
         | 
| 220 | 
            +
            		#----------------
         | 
| 221 | 
            +
            		stats_hash['ncrna'] += seqs.select{|s| s.type == NCRNA}.length
         | 
| 222 | 
            +
             | 
| 223 | 
            +
            		#Coding sequences
         | 
| 224 | 
            +
            		#----------------
         | 
| 225 | 
            +
            		coding = seqs.select{|s| s.type == CODING}
         | 
| 226 | 
            +
            		stats_hash['coding'] += coding.length
         | 
| 227 | 
            +
             | 
| 228 | 
            +
            		#By Status
         | 
| 229 | 
            +
            		stats_hash['coding_sure'] += coding.select{|s| s.status}.length
         | 
| 230 | 
            +
            		stats_hash['coding_putative'] += coding.select{|s| !s.status}.length
         | 
| 231 | 
            +
             | 
| 232 | 
            +
            		#By Length
         | 
| 233 | 
            +
            		stats_hash['coding_>200'] += coding.select{|s| s.fasta_length > low_limit}.length
         | 
| 234 | 
            +
            		stats_hash['coding_>500'] += coding.select{|s| s.fasta_length > upper_limit}.length
         | 
| 235 | 
            +
             | 
| 236 | 
            +
             | 
| 237 | 
            +
            		return stats_hash, diff_ids_array, diff_ids_complete_array, all_seq_lengths
         | 
| 238 | 
            +
            	end
         | 
| 239 | 
            +
             | 
| 240 | 
            +
            	def calculate_n50_n90(stats_hash, f_tot_key, n50_key, n90_key, seq_lengths)
         | 
| 241 | 
            +
            		f_tot_lengths = stats_hash[f_tot_key].to_f
         | 
| 242 | 
            +
            		cum = 0
         | 
| 243 | 
            +
            		seq_lengths.sort!{|a, b| b <=> a}
         | 
| 244 | 
            +
            		seq_lengths.each do |length|
         | 
| 245 | 
            +
            			cum += length
         | 
| 246 | 
            +
            			if cum / f_tot_lengths > 0.5 && stats_hash[n50_key] == 0
         | 
| 247 | 
            +
            				stats_hash[n50_key] = length
         | 
| 248 | 
            +
            			elsif cum / f_tot_lengths > 0.9
         | 
| 249 | 
            +
            				stats_hash[n90_key] = length
         | 
| 250 | 
            +
            				break
         | 
| 251 | 
            +
            			end
         | 
| 252 | 
            +
            		end
         | 
| 253 | 
            +
            	end
         | 
| 254 | 
            +
             | 
| 255 | 
            +
            	def last_stats(stats_hash, diff_ids_array, diff_ids_complete_array, pre_fln_seq_lengths, seq_lengths)
         | 
| 256 | 
            +
            		stats_hash['different_orthologues'] = diff_ids_array.length
         | 
| 257 | 
            +
            		stats_hash['different_completes'] = diff_ids_complete_array.length
         | 
| 258 | 
            +
            		stats_hash['mean_length'] = stats_hash['full_transcriptome_length'].to_f / stats_hash['good_seqs'] if stats_hash['good_seqs'] > 0 
         | 
| 259 | 
            +
            		stats_hash['indetermination_mean_length'] = stats_hash['indeterminations'].to_f / stats_hash['gap_number'] if stats_hash['gap_number'] > 0
         | 
| 260 | 
            +
            		stats_hash['PRE_FLN_mean_length'] = stats_hash['PRE_FLN_full_transcriptome_length'].to_f / stats_hash['input_seqs'] if stats_hash['input_seqs'] > 0
         | 
| 261 | 
            +
            		stats_hash['PRE_FLN_indetermination_mean_length'] = stats_hash['PRE_FLN_indeterminations'].to_f / stats_hash['PRE_FLN_gap_number'] if stats_hash['PRE_FLN_gap_number'] > 0
         | 
| 262 | 
            +
             | 
| 263 | 
            +
            		calculate_n50_n90(stats_hash, 'full_transcriptome_length', 'n50', 'n90', seq_lengths)
         | 
| 264 | 
            +
            		calculate_n50_n90(stats_hash, 'PRE_FLN_full_transcriptome_length', 'PRE_FLN_n50', 'PRE_FLN_n90', pre_fln_seq_lengths)
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                    #BA index
         | 
| 267 | 
            +
            	    if stats_hash['prot_annotated'] > 0 && 
         | 
| 268 | 
            +
            	    	stats_hash['complete'] > 0 && 
         | 
| 269 | 
            +
            	    	stats_hash['sequences_>500'] > 0 && 
         | 
| 270 | 
            +
            	    	stats_hash['different_orthologues'] > 0 && 
         | 
| 271 | 
            +
            	    	stats_hash['different_completes'] > 0
         | 
| 272 | 
            +
            	        coef_anot_geom = (stats_hash['prot_annotated'] * stats_hash['complete'] * 1.0)/(stats_hash['sequences_>500']*10000)
         | 
| 273 | 
            +
            	        coef_mejora = (stats_hash['different_orthologues']*1.0 + stats_hash['different_completes'])/(stats_hash['prot_annotated'] + stats_hash['complete'])
         | 
| 274 | 
            +
            	   		stats_hash['BA_index'] = Math.sqrt(coef_anot_geom*coef_mejora)
         | 
| 275 | 
            +
            	   	end
         | 
| 276 | 
            +
             | 
| 277 | 
            +
            		return stats_hash
         | 
| 278 | 
            +
            	end
         | 
| 279 | 
            +
             | 
| 280 | 
            +
            	def coding_stats_reptrans(coding_seq, stats_hash)
         | 
| 281 | 
            +
            		group = nil
         | 
| 282 | 
            +
            		if coding_seq.t_code > 1
         | 
| 283 | 
            +
            			group = 'coding_>1'
         | 
| 284 | 
            +
            		elsif coding_seq.t_code > 0.95
         | 
| 285 | 
            +
            			group = 'coding_>0.94'
         | 
| 286 | 
            +
            		elsif coding_seq.t_code > 0.85
         | 
| 287 | 
            +
            			group = 'coding_>0.84'
         | 
| 288 | 
            +
            		elsif coding_seq.t_code > 0.73
         | 
| 289 | 
            +
            			group = 'coding_>0.73'
         | 
| 290 | 
            +
            		elsif coding_seq.t_code > 0
         | 
| 291 | 
            +
            			group = 'coding_>0'
         | 
| 292 | 
            +
            		end
         | 
| 293 | 
            +
            		if !group.nil?
         | 
| 294 | 
            +
            			stats_hash[group] += 1
         | 
| 295 | 
            +
            		end
         | 
| 296 | 
            +
            	end
         | 
| 297 | 
            +
             | 
| 298 | 
            +
            	def handle_data_main_summary(stats_hash, stats_taxonomy, stats_functional_annotation_by_seqs)
         | 
| 299 | 
            +
            		container = {}
         | 
| 300 | 
            +
             | 
| 301 | 
            +
            		identation = '     '
         | 
| 302 | 
            +
            		# GENERAL REPORT TABLE
         | 
| 303 | 
            +
            		#-------------------------------------------------------
         | 
| 304 | 
            +
            		general_report = [ 
         | 
| 305 | 
            +
            			['', 'Sequences', '%'],
         | 
| 306 | 
            +
            			['Input', stats_hash['input_seqs']],
         | 
| 307 | 
            +
            			[identation + 'N50 (bp)', stats_hash['PRE_FLN_n50']],
         | 
| 308 | 
            +
            			[identation + 'N90 (bp)', stats_hash['PRE_FLN_n90']],
         | 
| 309 | 
            +
            			[identation + 'Full transcriptome length (bp)', stats_hash['PRE_FLN_full_transcriptome_length']],
         | 
| 310 | 
            +
            			[identation + 'Mean sequence length (bp)', '%.2f' % stats_hash['PRE_FLN_mean_length']],
         | 
| 311 | 
            +
            			[identation + 'Nucleotide indeterminations (bp)', stats_hash['PRE_FLN_indeterminations']],
         | 
| 312 | 
            +
            			[identation + 'Mean indetermination length (bp)', '%.2f' % stats_hash['PRE_FLN_indetermination_mean_length']],
         | 
| 313 | 
            +
            			[identation + 'Unigenes >500pb', stats_hash['PRE_FLN_sequences_>500']],
         | 
| 314 | 
            +
            			[identation + 'Failing sequences', stats_hash['failed']],
         | 
| 315 | 
            +
            			[identation + 'Artifacts <sup>1</sup>', stats_hash['artifacts']], 
         | 
| 316 | 
            +
            			[identation*2 + 'Unmapped transcripts', stats_hash['unmapped']], 
         | 
| 317 | 
            +
            			[identation*2 + 'Misassembled', stats_hash['misassembled']], 
         | 
| 318 | 
            +
            			[identation*2 + 'Chimeras', stats_hash['chimeras']],
         | 
| 319 | 
            +
            			[identation*2 + 'Other', stats_hash['other_artifacts']],
         | 
| 320 | 
            +
            			['Sequences with resolved chimeras', stats_hash['output_seqs']],
         | 
| 321 | 
            +
            			['Sequences without artifacts', stats_hash['good_seqs']],
         | 
| 322 | 
            +
            			[identation + 'N50 (bp)', stats_hash['n50']],
         | 
| 323 | 
            +
            			[identation + 'N90 (bp)', stats_hash['n90']],
         | 
| 324 | 
            +
            			[identation + 'Full transcriptome length (bp)', stats_hash['full_transcriptome_length']],
         | 
| 325 | 
            +
            			[identation + 'Mean sequence length (bp)', '%.2f' % stats_hash['mean_length']],
         | 
| 326 | 
            +
            			[identation + 'Nucleotide indeterminations (bp)', stats_hash['indeterminations']],
         | 
| 327 | 
            +
            			[identation + 'Mean indetermination length (bp)', '%.2f' % stats_hash['indetermination_mean_length']]
         | 
| 328 | 
            +
            		]
         | 
| 329 | 
            +
            		denominators = [ 
         | 
| 330 | 
            +
            			stats_hash['input_seqs'],
         | 
| 331 | 
            +
            			0,
         | 
| 332 | 
            +
            			0,
         | 
| 333 | 
            +
            			0,
         | 
| 334 | 
            +
            			0,
         | 
| 335 | 
            +
            			stats_hash['PRE_FLN_full_transcriptome_length'],
         | 
| 336 | 
            +
            			0,
         | 
| 337 | 
            +
            			stats_hash['input_seqs'],
         | 
| 338 | 
            +
            			stats_hash['output_seqs'],
         | 
| 339 | 
            +
            			stats_hash['output_seqs'],
         | 
| 340 | 
            +
            			stats_hash['artifacts'],
         | 
| 341 | 
            +
            			stats_hash['artifacts'],
         | 
| 342 | 
            +
            			stats_hash['artifacts'],
         | 
| 343 | 
            +
            			stats_hash['artifacts'], 
         | 
| 344 | 
            +
            			stats_hash['input_seqs'],
         | 
| 345 | 
            +
            			stats_hash['output_seqs'],
         | 
| 346 | 
            +
            			0,
         | 
| 347 | 
            +
            			0,
         | 
| 348 | 
            +
            			0,
         | 
| 349 | 
            +
            			0,
         | 
| 350 | 
            +
            			stats_hash['full_transcriptome_length'],
         | 
| 351 | 
            +
            			0
         | 
| 352 | 
            +
            		]
         | 
| 353 | 
            +
            		add_percentages_by_vector(general_report, 1, denominators)
         | 
| 354 | 
            +
            		general_report << ['BA index', "%5.2f" % [stats_hash['BA_index']], '-'] if stats_hash['BA_index'] > 0
         | 
| 355 | 
            +
             | 
| 356 | 
            +
            		# ASSEMBLY REPORT TABLE
         | 
| 357 | 
            +
            		#-------------------------------------------------------
         | 
| 358 | 
            +
            		without_orthologue = stats_hash['coding']+ stats_hash['unknown']
         | 
| 359 | 
            +
            		assembly_report = [
         | 
| 360 | 
            +
            			['', 'Unigenes', '%'],
         | 
| 361 | 
            +
            			['Unigenes', stats_hash['good_seqs']],
         | 
| 362 | 
            +
            			['Unigenes >500pb', stats_hash['sequences_>500']],
         | 
| 363 | 
            +
            			['Unigenes >200pb', stats_hash['sequences_>200']],
         | 
| 364 | 
            +
            			['Longest unigene', stats_hash['longest_unigene']],
         | 
| 365 | 
            +
            			['With orthologue <sup>1</sup>', stats_hash['prot_annotated']],
         | 
| 366 | 
            +
            			[identation + 'Different orthologue IDs', stats_hash['different_orthologues']],
         | 
| 367 | 
            +
            			[identation + 'Complete transcripts', stats_hash['complete']],
         | 
| 368 | 
            +
            			[identation + 'Different complete transcripts', stats_hash['different_completes']],
         | 
| 369 | 
            +
            			['ncRNA', stats_hash['ncrna']],
         | 
| 370 | 
            +
            			['Without orthologue <sup>1</sup>', without_orthologue],
         | 
| 371 | 
            +
            			[identation + 'Coding (all)', stats_hash['coding']],
         | 
| 372 | 
            +
            			[identation + 'Coding > 200bp', stats_hash['coding_>200']],
         | 
| 373 | 
            +
            			[identation + 'Coding > 500bp', stats_hash['coding_>500']],
         | 
| 374 | 
            +
            			[identation + 'Unknown (all)', stats_hash['unknown']],
         | 
| 375 | 
            +
            			[identation + 'Unknown > 200bp', stats_hash['unknown_>200']],
         | 
| 376 | 
            +
            			[identation + 'Unknown > 500bp', stats_hash['unknown_>500']]
         | 
| 377 | 
            +
            		]
         | 
| 378 | 
            +
            		denominators = [
         | 
| 379 | 
            +
            			stats_hash['good_seqs'],
         | 
| 380 | 
            +
            			stats_hash['good_seqs'],
         | 
| 381 | 
            +
            			stats_hash['good_seqs'],
         | 
| 382 | 
            +
            			0,
         | 
| 383 | 
            +
            			stats_hash['good_seqs'],
         | 
| 384 | 
            +
            			stats_hash['prot_annotated'],
         | 
| 385 | 
            +
            			stats_hash['prot_annotated'],
         | 
| 386 | 
            +
            			stats_hash['prot_annotated'],
         | 
| 387 | 
            +
            			stats_hash['good_seqs'],
         | 
| 388 | 
            +
            			stats_hash['good_seqs'],
         | 
| 389 | 
            +
            			without_orthologue,
         | 
| 390 | 
            +
            			without_orthologue,
         | 
| 391 | 
            +
            			without_orthologue,
         | 
| 392 | 
            +
            			without_orthologue,
         | 
| 393 | 
            +
            			without_orthologue,
         | 
| 394 | 
            +
            			without_orthologue
         | 
| 395 | 
            +
            		]
         | 
| 396 | 
            +
            		add_percentages_by_vector(assembly_report, 1, denominators)
         | 
| 397 | 
            +
             | 
| 398 | 
            +
            		# STRUCTURAL PROFILE
         | 
| 399 | 
            +
            		#-------------------------------------------------------
         | 
| 400 | 
            +
            		structural_data = [
         | 
| 401 | 
            +
            			['Category', 'Sure', 'Putative'],
         | 
| 402 | 
            +
            			['Unknown', stats_hash['unknown'], 0],
         | 
| 403 | 
            +
            			['Complete', stats_hash['complete_sure'], stats_hash['complete_putative']],
         | 
| 404 | 
            +
            			['N-terminal', stats_hash['n_terminal_sure'], stats_hash['n_terminal_putative']], 
         | 
| 405 | 
            +
            			['C-terminal', stats_hash['c_terminal_sure'], stats_hash['c_terminal_putative']],
         | 
| 406 | 
            +
            			['Internal', stats_hash['internal'], 0],
         | 
| 407 | 
            +
            			['ncrna', stats_hash['ncrna'], 0],
         | 
| 408 | 
            +
            			['Coding', stats_hash['coding'], stats_hash['coding_putative']]
         | 
| 409 | 
            +
            		]
         | 
| 410 | 
            +
            		structural_data.each_with_index do |row, i|
         | 
| 411 | 
            +
            			row.each_with_index do |field, j|
         | 
| 412 | 
            +
            				structural_data[i][j] = field*100.0/stats_hash['good_seqs'] if i > 0 && j > 0 && structural_data[i][j] > 0				
         | 
| 413 | 
            +
            			end
         | 
| 414 | 
            +
            		end
         | 
| 415 | 
            +
             | 
| 416 | 
            +
            		# STATUS REPORT
         | 
| 417 | 
            +
            		#----------------------------------------------------------
         | 
| 418 | 
            +
            		status_report = [
         | 
| 419 | 
            +
            			['Status', 'colspan', 'Unigenes', '%'],
         | 
| 420 | 
            +
            			['Complete', 'Sure', stats_hash['complete_sure']],
         | 
| 421 | 
            +
            			['rowspan', 'Putative', stats_hash['complete_putative']],
         | 
| 422 | 
            +
            			['C-terminus', 'Sure', stats_hash['c_terminal_sure']],
         | 
| 423 | 
            +
            			['rowspan', 'Putative', stats_hash['c_terminal_putative']],
         | 
| 424 | 
            +
            			['N-terminus', 'Sure', stats_hash['n_terminal_sure']],
         | 
| 425 | 
            +
            			['rowspan', 'Putative', stats_hash['n_terminal_putative']],
         | 
| 426 | 
            +
            			['Internal', 'colspan', stats_hash['internal']],
         | 
| 427 | 
            +
            			['Coding', 'Sure', stats_hash['coding_sure']],
         | 
| 428 | 
            +
            			['rowspan', 'Putative', stats_hash['coding_putative']],
         | 
| 429 | 
            +
            			['ncRNA', 'colspan', stats_hash['ncrna']],
         | 
| 430 | 
            +
            			['Unknown', 'colspan', stats_hash['unknown']],
         | 
| 431 | 
            +
            			['Total', 'colspan', stats_hash['good_seqs']],
         | 
| 432 | 
            +
            		]
         | 
| 433 | 
            +
            		add_percentages_by_scalar(status_report, 2, stats_hash['good_seqs'])
         | 
| 434 | 
            +
             | 
| 435 | 
            +
            		# TAXONOMY PROFILE
         | 
| 436 | 
            +
            		#-------------------------------------------------------
         | 
| 437 | 
            +
            		taxonomy = [
         | 
| 438 | 
            +
            			['Organism', 'Annotations']
         | 
| 439 | 
            +
            		].concat(stats_taxonomy.to_a.sort{|s2, s1| s1.last <=> s2.last}[0..20])
         | 
| 440 | 
            +
             | 
| 441 | 
            +
            		# TAXONOMY PROFILE
         | 
| 442 | 
            +
            		#-------------------------------------------------------
         | 
| 443 | 
            +
            		database_report = [
         | 
| 444 | 
            +
            			['', 'Unigenes', '%'],
         | 
| 445 | 
            +
            			['UserDB', stats_hash['userdb']],
         | 
| 446 | 
            +
            			['SwissProt', stats_hash['swissprot']],
         | 
| 447 | 
            +
            			['TrEMBL', stats_hash['trembl']],
         | 
| 448 | 
            +
            			['ncRNA', stats_hash['ncrna']],
         | 
| 449 | 
            +
            			['None', stats_hash['coding']+ stats_hash['unknown']],
         | 
| 450 | 
            +
            			['Total', stats_hash['good_seqs']]
         | 
| 451 | 
            +
            		]
         | 
| 452 | 
            +
            		add_percentages_by_scalar(database_report, 1, stats_hash['good_seqs'])
         | 
| 453 | 
            +
             | 
| 454 | 
            +
            		# GO ANNOTATION
         | 
| 455 | 
            +
            		#-------------------------------------------------------
         | 
| 456 | 
            +
             		container.merge!(go_for_graph(stats_functional_annotation_by_seqs))
         | 
| 457 | 
            +
             | 
| 458 | 
            +
            		# BUILD CONTAINER
         | 
| 459 | 
            +
            		#-------------------------------------------------------
         | 
| 460 | 
            +
            		container[:general_report] = general_report
         | 
| 461 | 
            +
            		container[:assembly_report] = assembly_report
         | 
| 462 | 
            +
            		container[:structural_data] = structural_data
         | 
| 463 | 
            +
            		container[:status_report] = status_report
         | 
| 464 | 
            +
            		container[:taxonomy] = taxonomy
         | 
| 465 | 
            +
            		container[:database_report] = database_report
         | 
| 466 | 
            +
            		return container
         | 
| 467 | 
            +
            	end
         | 
| 468 | 
            +
             | 
| 469 | 
            +
             | 
| 470 | 
            +
            	def handle_data_reptrans_summary(stats_hash)
         | 
| 471 | 
            +
            		# GENERAL REPORT
         | 
| 472 | 
            +
            		#-------------------------------------------------------
         | 
| 473 | 
            +
            		all_seqs = 0
         | 
| 474 | 
            +
            		stats_hash.values.map{|v| all_seqs += v}
         | 
| 475 | 
            +
            		general_report = [
         | 
| 476 | 
            +
            			['', 'Sequences', '%'],
         | 
| 477 | 
            +
            			['Output', all_seqs],
         | 
| 478 | 
            +
            			['Annotated with protein', stats_hash['prot_annotated']],
         | 
| 479 | 
            +
            			['Annotated with EST', stats_hash['est_annotated']],
         | 
| 480 | 
            +
            			['Coding test-code > 1', stats_hash['coding_>1']],
         | 
| 481 | 
            +
            			['Coding test-code > 0.94', stats_hash['coding_>0.94']],
         | 
| 482 | 
            +
            			['Coding test-code > 0.84', stats_hash['coding_>0.84']],
         | 
| 483 | 
            +
            			['Coding test-code > 0.73', stats_hash['coding_>0.73']],
         | 
| 484 | 
            +
            			['Coding test-code > 0', stats_hash['coding_>0']]
         | 
| 485 | 
            +
            		]
         | 
| 486 | 
            +
            		add_percentages_by_scalar(general_report, 1, all_seqs)
         | 
| 487 | 
            +
             | 
| 488 | 
            +
            		# ACUMULATIVE REPORT
         | 
| 489 | 
            +
            		#-------------------------------------------------------
         | 
| 490 | 
            +
            		categories = [ 
         | 
| 491 | 
            +
            			'Annotated with protein',
         | 
| 492 | 
            +
            			'Annotated with EST',
         | 
| 493 | 
            +
            			'Coding test-code > 1',
         | 
| 494 | 
            +
            			'Coding test-code > 0.94',
         | 
| 495 | 
            +
            			'Coding test-code > 0.84',
         | 
| 496 | 
            +
            			'Coding test-code > 0.73',
         | 
| 497 | 
            +
            			'Coding test-code > 0'
         | 
| 498 | 
            +
            		]
         | 
| 499 | 
            +
            		values = [
         | 
| 500 | 
            +
            			stats_hash['prot_annotated'],
         | 
| 501 | 
            +
            			stats_hash['est_annotated'],
         | 
| 502 | 
            +
            			stats_hash['coding_>1'],
         | 
| 503 | 
            +
            			stats_hash['coding_>0.94'],
         | 
| 504 | 
            +
            			stats_hash['coding_>0.84'],
         | 
| 505 | 
            +
            			stats_hash['coding_>0.73'],
         | 
| 506 | 
            +
            			stats_hash['coding_>0']
         | 
| 507 | 
            +
            		]
         | 
| 508 | 
            +
            		acumulative = []
         | 
| 509 | 
            +
            		acumulative << values.inject(0) { |result, element| 
         | 
| 510 | 
            +
            			acumulative << result if result > 0 
         | 
| 511 | 
            +
            			result + element 
         | 
| 512 | 
            +
            		}
         | 
| 513 | 
            +
            		report = []
         | 
| 514 | 
            +
            		categories.each_with_index do |cat, i|
         | 
| 515 | 
            +
            			report << [cat, acumulative[i]] 
         | 
| 516 | 
            +
            		end
         | 
| 517 | 
            +
            		acumulative_report = [
         | 
| 518 | 
            +
            			['', 'Sequences', '%'],
         | 
| 519 | 
            +
            		].concat(report)
         | 
| 520 | 
            +
            		add_percentages_by_scalar(acumulative_report, 1, all_seqs)
         | 
| 521 | 
            +
             | 
| 522 | 
            +
            		# BUILD CONTAINER
         | 
| 523 | 
            +
            		#-------------------------------------------------------
         | 
| 524 | 
            +
            		container = {}
         | 
| 525 | 
            +
            		container[:general_report] = general_report
         | 
| 526 | 
            +
            		container[:acumulative_report] = acumulative_report
         | 
| 527 | 
            +
            		return container
         | 
| 528 | 
            +
            	end
         | 
| 529 | 
            +
             | 
| 530 | 
            +
            	def add_percentages_by_vector(table, col, denominators)
         | 
| 531 | 
            +
            		table.each_with_index do |row, i|
         | 
| 532 | 
            +
            			next if i == 0 #Skip header
         | 
| 533 | 
            +
            			den = denominators[i-1]
         | 
| 534 | 
            +
            			perc = row[col]*100.0/denominators[i-1] if den > 0
         | 
| 535 | 
            +
            			if den > 0 && !perc.nan? && (perc).infinite?.nil? 
         | 
| 536 | 
            +
            				percentage = '%.2f' % perc.to_s 
         | 
| 537 | 
            +
            				percentage += '%'
         | 
| 538 | 
            +
            			else
         | 
| 539 | 
            +
            				percentage ='-'
         | 
| 540 | 
            +
            			end
         | 
| 541 | 
            +
            			row << percentage
         | 
| 542 | 
            +
            		end
         | 
| 543 | 
            +
            	end
         | 
| 544 | 
            +
             | 
| 545 | 
            +
            	def add_percentages_by_scalar(table, col, denominator)
         | 
| 546 | 
            +
            		table.each_with_index do |row, i|
         | 
| 547 | 
            +
            			next if i == 0 #Skip header
         | 
| 548 | 
            +
            			perc = row[col]*100.0/denominator
         | 
| 549 | 
            +
            			if !perc.nan? && perc.infinite?.nil?
         | 
| 550 | 
            +
            				percentage = '%.2f' % perc.to_s 
         | 
| 551 | 
            +
            				percentage += '%'
         | 
| 552 | 
            +
            			else
         | 
| 553 | 
            +
            				percentage ='-'
         | 
| 554 | 
            +
            			end
         | 
| 555 | 
            +
            			row << percentage
         | 
| 556 | 
            +
            		end
         | 
| 557 | 
            +
            	end
         | 
| 558 | 
            +
             | 
| 559 | 
            +
            	def write_summary_stats(stats_hash, stats_taxonomy, stats_functional_annotation_by_seqs, diff_ids_array, diff_ids_complete_array, pre_fln_seq_lengths, seq_lengths, txt_file, html_file)
         | 
| 560 | 
            +
            		stats_hash = last_stats(stats_hash, diff_ids_array, diff_ids_complete_array, pre_fln_seq_lengths, seq_lengths)
         | 
| 561 | 
            +
            		write_txt(stats_hash, txt_file)
         | 
| 562 | 
            +
            		container = handle_data_main_summary(stats_hash, stats_taxonomy, stats_functional_annotation_by_seqs)
         | 
| 563 | 
            +
            		template = File.open(File.join(REPORT_FOLDER, 'general_summary.erb')).read
         | 
| 564 | 
            +
            		report = Report_html.new(container, 'FLN Summary')
         | 
| 565 | 
            +
            		report.build(template)
         | 
| 566 | 
            +
            		report.write(html_file)
         | 
| 567 | 
            +
            	end
         | 
| 568 | 
            +
             | 
| 569 | 
            +
            	def write_mapping_report(fpkm, coverage_analysis, stats_functional_annotation_by_seqs)
         | 
| 570 | 
            +
            		if !fpkm.empty? && !coverage_analysis.empty? # REPORT Mapping
         | 
| 571 | 
            +
            			container = go_for_graph(stats_functional_annotation_by_seqs, fpkm)
         | 
| 572 | 
            +
            			measured_coverages = coverage_analysis.values.map{|c| [c[1], c[2]]}
         | 
| 573 | 
            +
            			measured_coverages.sort!{|c1, c2| c2[1] <=> c1[1]}
         | 
| 574 | 
            +
            			measured_coverages.each_with_index do |cov, i|
         | 
| 575 | 
            +
            				cov.unshift(i+1) # Puts x axis: 1, 2, 3 ... (seqs)
         | 
| 576 | 
            +
            			end 
         | 
| 577 | 
            +
            			measured_coverages.unshift(%w[transcripts mean_10max mean])
         | 
| 578 | 
            +
            			container[:mean_coverage] = measured_coverages
         | 
| 579 | 
            +
            			count = 0
         | 
| 580 | 
            +
            			container[:max10_coverage] = coverage_analysis.values.sort{|c1, c2| c2[1] <=> c1[1]}.map{|c| count += 1; [count, c[1]]}
         | 
| 581 | 
            +
            			container[:normalized_partial_coverage] = coverage_analysis.values.map{|c| [c[3], c[0]] }
         | 
| 582 | 
            +
            			mean_cov_trasn_cov = coverage_analysis.values.map{|data| [data[3], data[2]]}
         | 
| 583 | 
            +
            			mean_cov_trasn_cov.sort!{|i1, i2| i1[0] <=> i2[0]}
         | 
| 584 | 
            +
            			mean_cov_trasn_cov.unshift(%w[trans_cov mean_coverage])
         | 
| 585 | 
            +
            			container[:normalized_coverages_sorted_by_npc] = mean_cov_trasn_cov
         | 
| 586 | 
            +
            			template = File.open(File.join(REPORT_FOLDER, 'mapping_summary.erb')).read
         | 
| 587 | 
            +
            			report = Report_html.new(container, 'FLN Summary')
         | 
| 588 | 
            +
            			report.build(template)
         | 
| 589 | 
            +
            			report.write(File.join('fln_results', 'mapping_summary.html'))
         | 
| 590 | 
            +
            		end
         | 
| 591 | 
            +
            	end
         | 
| 592 | 
            +
            	
         | 
| 593 | 
            +
            	def write_reptrans_stats(stats_hash, html_file, txt_file)
         | 
| 594 | 
            +
            		txt = File.open(txt_file,'w')
         | 
| 595 | 
            +
            		write_txt(stats_hash, txt)
         | 
| 596 | 
            +
            		container = handle_data_reptrans_summary(stats_hash)
         | 
| 597 | 
            +
            		template = File.open(File.join(REPORT_FOLDER, 'reptrans_summary.erb')).read
         | 
| 598 | 
            +
            		report = Report_html.new(container, 'FLN Reptrans Summary')
         | 
| 599 | 
            +
            		report.build(template)
         | 
| 600 | 
            +
            		report.write(html_file)
         | 
| 601 | 
            +
            	end
         | 
| 602 | 
            +
             | 
| 603 | 
            +
            	def write_txt(stats_hash, file)
         | 
| 604 | 
            +
            		stats_hash.each do |key, value|
         | 
| 605 | 
            +
            			file.puts "#{value}\t#{key}"
         | 
| 606 | 
            +
            		end
         | 
| 607 | 
            +
            	end
         | 
| 608 | 
            +
             | 
| 609 | 
            +
            	def table_title(title)
         | 
| 610 | 
            +
            		html = 	'<div style="font-size:25px; margin: 10"><b>'+title+'</b></div>'
         | 
| 611 | 
            +
            		return html
         | 
| 612 | 
            +
            	end
         | 
| 613 | 
            +
            end
         |